diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7697 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0926472194908774, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 6.1875, + "step": 1 + }, + { + "epoch": 0.0, + "eval_exact_match": 9.6809, + "eval_exact_match_for_answerability_classification": 17.7692, + "eval_exact_match_for_cause_effect_classification": 0.1429, + "eval_exact_match_for_coreference_resolution": 13.6429, + "eval_exact_match_for_data_to_text": 2.1792, + "eval_exact_match_for_dialogue_act_recognition": 22.4286, + "eval_exact_match_for_grammar_error_correction": 4.0, + "eval_exact_match_for_keyword_tagging": 14.4, + "eval_exact_match_for_overlap_extraction": 4.5, + "eval_exact_match_for_question_rewriting": 0.4545, + "eval_exact_match_for_task020_mctaco_answerability_classification": 35.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 7.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 1.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 9.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 40.0, + "eval_exact_match_for_task102_commongen_data_to_text": 3.0, + "eval_exact_match_for_task1152_bard_word_analogy": 0.0, + "eval_exact_match_for_task1153_bard_word_analogy": 2.0, + "eval_exact_match_for_task1154_bard_word_analogy": 0.0, + "eval_exact_match_for_task1155_bard_word_analogy": 0.0, + "eval_exact_match_for_task1156_bard_word_analogy": 5.0, + "eval_exact_match_for_task1157_bard_word_analogy": 0.0, + "eval_exact_match_for_task1158_bard_word_analogy": 0.0, + "eval_exact_match_for_task1159_bard_word_analogy": 4.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 44.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 5.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 0.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 0.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 1.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1407_dart_data_to_text": 1.0, + "eval_exact_match_for_task1409_dart_data_to_text": 3.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 25.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 0.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 9.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 4.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 0.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 8.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 0.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 28.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 4.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 22.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 3.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 25.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 25.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 22.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 75.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 1.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 44.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 19.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 1.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 8.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 8.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 21.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 2.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 52.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, + "eval_exact_match_for_task602_wikitext_title_generation": 0.0, + "eval_exact_match_for_task613_liar_keyword_tagging": 13.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 8.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 1.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 49.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 46.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 49.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 30.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 6.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 14.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 26.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 33.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 6.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 0.0, + "eval_exact_match_for_textual_entailment": 12.875, + "eval_exact_match_for_title_generation": 7.9036, + "eval_exact_match_for_word_analogy": 1.375, + "eval_f1": 26.8473, + "eval_f1_for_answerability_classification": 18.7926, + "eval_f1_for_cause_effect_classification": 22.0441, + "eval_f1_for_coreference_resolution": 22.0192, + "eval_f1_for_data_to_text": 50.8769, + "eval_f1_for_dialogue_act_recognition": 25.44, + "eval_f1_for_grammar_error_correction": 53.4525, + "eval_f1_for_keyword_tagging": 29.8915, + "eval_f1_for_overlap_extraction": 31.4115, + "eval_f1_for_question_rewriting": 59.7762, + "eval_f1_for_task020_mctaco_answerability_classification": 35.1257, + "eval_f1_for_task033_winogrande_coreference_resolution": 7.0, + "eval_f1_for_task034_winogrande_question_rewriting": 65.8388, + "eval_f1_for_task035_winogrande_question_rewriting": 70.0901, + "eval_f1_for_task036_qasc_keyword_tagging": 45.6515, + "eval_f1_for_task039_qasc_overlap_extraction": 18.1698, + "eval_f1_for_task050_multirc_answerability_classification": 40.0, + "eval_f1_for_task102_commongen_data_to_text": 61.7543, + "eval_f1_for_task1152_bard_word_analogy": 0.0, + "eval_f1_for_task1153_bard_word_analogy": 2.6667, + "eval_f1_for_task1154_bard_word_analogy": 0.0, + "eval_f1_for_task1155_bard_word_analogy": 0.0, + "eval_f1_for_task1156_bard_word_analogy": 5.6667, + "eval_f1_for_task1157_bard_word_analogy": 0.0, + "eval_f1_for_task1158_bard_word_analogy": 0.0, + "eval_f1_for_task1159_bard_word_analogy": 4.0, + "eval_f1_for_task1161_coda_19_title_generation": 27.4485, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 71.5192, + "eval_f1_for_task121_atomic_question_rewriting": 47.9722, + "eval_f1_for_task133_winowhy_coreference_resolution": 44.3527, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.4092, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 31.0146, + "eval_f1_for_task1356_xlsum_title_generation": 10.5431, + "eval_f1_for_task1358_xlsum_title_generation": 31.9612, + "eval_f1_for_task1385_anli_textual_entailment": 19.6705, + "eval_f1_for_task1386_anli_textual_entailment": 3.0615, + "eval_f1_for_task1387_anli_textual_entailment": 11.8377, + "eval_f1_for_task1388_cb_textual_entailment": 8.219, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 21.3333, + "eval_f1_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 1.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 1.2264, + "eval_f1_for_task1407_dart_data_to_text": 41.147, + "eval_f1_for_task1409_dart_data_to_text": 53.273, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.6352, + "eval_f1_for_task1439_doqa_answerability_classification": 0.6086, + "eval_f1_for_task1442_doqa_answerability_classification": 2.4198, + "eval_f1_for_task1516_imppres_textual_entailment": 25.5689, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 2.9674, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 9.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 4.5, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0755, + "eval_f1_for_task1540_peer_read_title_generation": 11.2864, + "eval_f1_for_task1554_scitail_textual_entailment": 2.7532, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 77.2699, + "eval_f1_for_task1562_zest_question_rewriting": 57.2093, + "eval_f1_for_task1586_scifact_title_generation": 27.5331, + "eval_f1_for_task1598_nyc_data_to_text": 53.4539, + "eval_f1_for_task1612_sick_textual_entailment": 3.146, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 72.2277, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 28.4934, + "eval_f1_for_task1631_open_pi_data_to_text": 63.1483, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 22.0256, + "eval_f1_for_task1659_billsum_title_generation": 19.7515, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 48.2333, + "eval_f1_for_task1728_web_nlg_data_to_text": 60.6622, + "eval_f1_for_task190_snli_textual_entailment": 2.4744, + "eval_f1_for_task199_multinli_textual_entailment": 27.0, + "eval_f1_for_task200_multinli_textual_entailment": 25.0, + "eval_f1_for_task201_multinli_textual_entailment": 23.204, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 18.3193, + "eval_f1_for_task220_rocstories_title_generation": 75.0803, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 2.4124, + "eval_f1_for_task232_iirc_answerability_classification": 2.9836, + "eval_f1_for_task233_iirc_answerability_classification": 1.2321, + "eval_f1_for_task242_tweetqa_answerability_classification": 44.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 27.8929, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 44.6531, + "eval_f1_for_task288_gigaword_title_generation": 29.653, + "eval_f1_for_task290_tellmewhy_answerability_classification": 4.5868, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 1.8957, + "eval_f1_for_task329_gap_coreference_resolution": 32.3, + "eval_f1_for_task330_gap_coreference_resolution": 15.9333, + "eval_f1_for_task349_squad2.0_answerability_classification": 8.2353, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 40.2778, + "eval_f1_for_task391_cod3s_cause_effect_classification": 37.3, + "eval_f1_for_task392_cod3s_cause_effect_classification": 23.1896, + "eval_f1_for_task393_cod3s_cause_effect_classification": 23.9656, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 7.246, + "eval_f1_for_task402_grailqa_question_rewriting": 57.8297, + "eval_f1_for_task418_persent_title_generation": 15.8731, + "eval_f1_for_task442_com_qa_question_rewriting": 59.4344, + "eval_f1_for_task500_scruples_title_generation": 14.4384, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 30.2205, + "eval_f1_for_task520_aquamuse_answerability_classification": 52.1804, + "eval_f1_for_task569_recipe_nlg_title_generation": 42.4757, + "eval_f1_for_task602_wikitext_title_generation": 5.6369, + "eval_f1_for_task613_liar_keyword_tagging": 14.8333, + "eval_f1_for_task614_glucose_cause_effect_classification": 38.4533, + "eval_f1_for_task619_ohsumed_title_generation": 37.0978, + "eval_f1_for_task620_ohsumed_keyword_tagging": 16.7534, + "eval_f1_for_task623_ohsumed_keyword_tagging": 2.5712, + "eval_f1_for_task640_e_snli_textual_entailment": 1.8613, + "eval_f1_for_task641_e_snli_textual_entailment": 0.25, + "eval_f1_for_task642_e_snli_textual_entailment": 20.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 69.648, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 22.4152, + "eval_f1_for_task670_ambigqa_question_rewriting": 72.8116, + "eval_f1_for_task671_ambigqa_question_rewriting": 51.5913, + "eval_f1_for_task677_ollie_data_to_text": 32.3577, + "eval_f1_for_task738_perspectrum_textual_entailment": 46.6667, + "eval_f1_for_task743_eurlex_title_generation": 21.4068, + "eval_f1_for_task760_msr_sqa_data_to_text": 7.7287, + "eval_f1_for_task769_qed_title_generation": 66.848, + "eval_f1_for_task827_copa_cause_effect_classification": 0.0, + "eval_f1_for_task828_copa_cause_effect_classification": 30.4, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 30.0, + "eval_f1_for_task890_gwsd_textual_entailment": 7.4124, + "eval_f1_for_task891_gap_coreference_resolution": 39.6667, + "eval_f1_for_task892_gap_coreference_resolution": 14.0, + "eval_f1_for_task893_gap_coreference_resolution": 26.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 34.3333, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 17.4606, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 6.2859, + "eval_f1_for_task957_e2e_data_to_text": 52.4371, + "eval_f1_for_task970_sherliic_textual_entailment": 0.0, + "eval_f1_for_textual_entailment": 16.8822, + "eval_f1_for_title_generation": 27.9193, + "eval_f1_for_word_analogy": 1.5417, + "eval_gen_len": 36.6346, + "eval_global_step": 1, + "eval_loss": 5.386216163635254, + "eval_rouge1": 29.221, + "eval_rouge1_for_answerability_classification": 18.7741, + "eval_rouge1_for_cause_effect_classification": 27.801, + "eval_rouge1_for_coreference_resolution": 22.3337, + "eval_rouge1_for_data_to_text": 53.5713, + "eval_rouge1_for_dialogue_act_recognition": 26.7607, + "eval_rouge1_for_grammar_error_correction": 58.489, + "eval_rouge1_for_keyword_tagging": 34.029, + "eval_rouge1_for_overlap_extraction": 33.561, + "eval_rouge1_for_question_rewriting": 61.5221, + "eval_rouge1_for_task020_mctaco_answerability_classification": 35.1247, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 7.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 65.9167, + "eval_rouge1_for_task035_winogrande_question_rewriting": 70.7371, + "eval_rouge1_for_task036_qasc_keyword_tagging": 52.7489, + "eval_rouge1_for_task039_qasc_overlap_extraction": 21.6976, + "eval_rouge1_for_task050_multirc_answerability_classification": 40.0, + "eval_rouge1_for_task102_commongen_data_to_text": 73.5415, + "eval_rouge1_for_task1152_bard_word_analogy": 0.0, + "eval_rouge1_for_task1153_bard_word_analogy": 2.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 0.0, + "eval_rouge1_for_task1155_bard_word_analogy": 0.0, + "eval_rouge1_for_task1156_bard_word_analogy": 5.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 0.0, + "eval_rouge1_for_task1158_bard_word_analogy": 0.0, + "eval_rouge1_for_task1159_bard_word_analogy": 4.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 31.0522, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 72.5426, + "eval_rouge1_for_task121_atomic_question_rewriting": 50.3881, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 44.3519, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.2059, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 34.4477, + "eval_rouge1_for_task1356_xlsum_title_generation": 13.3458, + "eval_rouge1_for_task1358_xlsum_title_generation": 36.9829, + "eval_rouge1_for_task1385_anli_textual_entailment": 19.6705, + "eval_rouge1_for_task1386_anli_textual_entailment": 3.057, + "eval_rouge1_for_task1387_anli_textual_entailment": 11.7124, + "eval_rouge1_for_task1388_cb_textual_entailment": 8.146, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 21.3333, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 1.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 3.8192, + "eval_rouge1_for_task1407_dart_data_to_text": 41.7377, + "eval_rouge1_for_task1409_dart_data_to_text": 55.2234, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.3096, + "eval_rouge1_for_task1439_doqa_answerability_classification": 0.5802, + "eval_rouge1_for_task1442_doqa_answerability_classification": 2.2885, + "eval_rouge1_for_task1516_imppres_textual_entailment": 25.5236, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 2.9616, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 9.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 4.5, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0615, + "eval_rouge1_for_task1540_peer_read_title_generation": 12.8334, + "eval_rouge1_for_task1554_scitail_textual_entailment": 2.742, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 80.6684, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.8613, + "eval_rouge1_for_task1586_scifact_title_generation": 30.1217, + "eval_rouge1_for_task1598_nyc_data_to_text": 55.342, + "eval_rouge1_for_task1612_sick_textual_entailment": 2.9018, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 73.9947, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 28.487, + "eval_rouge1_for_task1631_open_pi_data_to_text": 63.3477, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 22.0256, + "eval_rouge1_for_task1659_billsum_title_generation": 20.6854, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 48.2333, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.7445, + "eval_rouge1_for_task190_snli_textual_entailment": 2.4653, + "eval_rouge1_for_task199_multinli_textual_entailment": 27.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 23.1438, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 23.7494, + "eval_rouge1_for_task220_rocstories_title_generation": 75.0803, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 2.3896, + "eval_rouge1_for_task232_iirc_answerability_classification": 2.9714, + "eval_rouge1_for_task233_iirc_answerability_classification": 1.2126, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 44.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 28.4881, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 45.4243, + "eval_rouge1_for_task288_gigaword_title_generation": 32.0391, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 4.5727, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 2.5741, + "eval_rouge1_for_task329_gap_coreference_resolution": 32.2917, + "eval_rouge1_for_task330_gap_coreference_resolution": 15.9333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 8.2324, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 40.2778, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 37.3, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 23.1896, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 24.4822, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 8.4077, + "eval_rouge1_for_task402_grailqa_question_rewriting": 59.6859, + "eval_rouge1_for_task418_persent_title_generation": 19.4105, + "eval_rouge1_for_task442_com_qa_question_rewriting": 62.9625, + "eval_rouge1_for_task500_scruples_title_generation": 16.0428, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 30.6988, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 52.1786, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.0994, + "eval_rouge1_for_task602_wikitext_title_generation": 6.3047, + "eval_rouge1_for_task613_liar_keyword_tagging": 24.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 44.9021, + "eval_rouge1_for_task619_ohsumed_title_generation": 39.8197, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 19.7677, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 2.5283, + "eval_rouge1_for_task640_e_snli_textual_entailment": 1.8571, + "eval_rouge1_for_task641_e_snli_textual_entailment": 0.25, + "eval_rouge1_for_task642_e_snli_textual_entailment": 20.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 71.1004, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 24.3916, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 73.7524, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 52.4544, + "eval_rouge1_for_task677_ollie_data_to_text": 35.0477, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 46.6667, + "eval_rouge1_for_task743_eurlex_title_generation": 22.6371, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 8.3794, + "eval_rouge1_for_task769_qed_title_generation": 67.3846, + "eval_rouge1_for_task827_copa_cause_effect_classification": 33.3333, + "eval_rouge1_for_task828_copa_cause_effect_classification": 30.4, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 7.4124, + "eval_rouge1_for_task891_gap_coreference_resolution": 39.6667, + "eval_rouge1_for_task892_gap_coreference_resolution": 14.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 26.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 37.3333, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 45.4584, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 38.2781, + "eval_rouge1_for_task957_e2e_data_to_text": 53.336, + "eval_rouge1_for_task970_sherliic_textual_entailment": 0.0, + "eval_rouge1_for_textual_entailment": 21.6769, + "eval_rouge1_for_title_generation": 30.016, + "eval_rouge1_for_word_analogy": 1.5417, + "eval_rougeL": 27.7552, + "eval_rougeL_for_answerability_classification": 18.7741, + "eval_rougeL_for_cause_effect_classification": 26.6129, + "eval_rougeL_for_coreference_resolution": 22.075, + "eval_rougeL_for_data_to_text": 46.0381, + "eval_rougeL_for_dialogue_act_recognition": 26.7508, + "eval_rougeL_for_grammar_error_correction": 57.621, + "eval_rougeL_for_keyword_tagging": 32.9357, + "eval_rougeL_for_overlap_extraction": 32.9762, + "eval_rougeL_for_question_rewriting": 57.3623, + "eval_rougeL_for_task020_mctaco_answerability_classification": 35.1247, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 7.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 65.2165, + "eval_rougeL_for_task035_winogrande_question_rewriting": 66.8884, + "eval_rougeL_for_task036_qasc_keyword_tagging": 50.123, + "eval_rougeL_for_task039_qasc_overlap_extraction": 21.6976, + "eval_rougeL_for_task050_multirc_answerability_classification": 40.0, + "eval_rougeL_for_task102_commongen_data_to_text": 65.5943, + "eval_rougeL_for_task1152_bard_word_analogy": 0.0, + "eval_rougeL_for_task1153_bard_word_analogy": 2.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 0.0, + "eval_rougeL_for_task1155_bard_word_analogy": 0.0, + "eval_rougeL_for_task1156_bard_word_analogy": 5.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 0.0, + "eval_rougeL_for_task1158_bard_word_analogy": 0.0, + "eval_rougeL_for_task1159_bard_word_analogy": 4.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 24.7961, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 70.099, + "eval_rougeL_for_task121_atomic_question_rewriting": 45.0941, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 44.3519, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.198, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 30.1515, + "eval_rougeL_for_task1356_xlsum_title_generation": 11.263, + "eval_rougeL_for_task1358_xlsum_title_generation": 30.8987, + "eval_rougeL_for_task1385_anli_textual_entailment": 19.6705, + "eval_rougeL_for_task1386_anli_textual_entailment": 3.057, + "eval_rougeL_for_task1387_anli_textual_entailment": 11.7124, + "eval_rougeL_for_task1388_cb_textual_entailment": 8.146, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 21.3333, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 1.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 3.7494, + "eval_rougeL_for_task1407_dart_data_to_text": 37.8486, + "eval_rougeL_for_task1409_dart_data_to_text": 46.5167, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.9619, + "eval_rougeL_for_task1439_doqa_answerability_classification": 0.5802, + "eval_rougeL_for_task1442_doqa_answerability_classification": 2.2885, + "eval_rougeL_for_task1516_imppres_textual_entailment": 25.5236, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 2.9616, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 9.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 4.5, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0615, + "eval_rougeL_for_task1540_peer_read_title_generation": 11.0377, + "eval_rougeL_for_task1554_scitail_textual_entailment": 2.742, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 79.2801, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.8325, + "eval_rougeL_for_task1586_scifact_title_generation": 24.4521, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.7464, + "eval_rougeL_for_task1612_sick_textual_entailment": 2.9018, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 72.2453, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 28.487, + "eval_rougeL_for_task1631_open_pi_data_to_text": 61.961, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 22.0256, + "eval_rougeL_for_task1659_billsum_title_generation": 17.2232, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 45.0429, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.5739, + "eval_rougeL_for_task190_snli_textual_entailment": 2.4653, + "eval_rougeL_for_task199_multinli_textual_entailment": 27.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 23.1438, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 23.5272, + "eval_rougeL_for_task220_rocstories_title_generation": 75.0803, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 2.3896, + "eval_rougeL_for_task232_iirc_answerability_classification": 2.9714, + "eval_rougeL_for_task233_iirc_answerability_classification": 1.2126, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 44.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 28.2024, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 44.2548, + "eval_rougeL_for_task288_gigaword_title_generation": 28.4022, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 4.5727, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 2.5741, + "eval_rougeL_for_task329_gap_coreference_resolution": 32.2917, + "eval_rougeL_for_task330_gap_coreference_resolution": 15.9333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 8.2324, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 40.2778, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 37.3, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 23.1896, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 22.3364, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 8.4077, + "eval_rougeL_for_task402_grailqa_question_rewriting": 51.2002, + "eval_rougeL_for_task418_persent_title_generation": 17.0545, + "eval_rougeL_for_task442_com_qa_question_rewriting": 55.7407, + "eval_rougeL_for_task500_scruples_title_generation": 14.1112, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 30.1171, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 52.1786, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 41.3899, + "eval_rougeL_for_task602_wikitext_title_generation": 6.22, + "eval_rougeL_for_task613_liar_keyword_tagging": 24.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 38.7309, + "eval_rougeL_for_task619_ohsumed_title_generation": 34.8616, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 19.1267, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 2.5283, + "eval_rougeL_for_task640_e_snli_textual_entailment": 1.8571, + "eval_rougeL_for_task641_e_snli_textual_entailment": 0.25, + "eval_rougeL_for_task642_e_snli_textual_entailment": 20.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 68.9004, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 24.2455, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 71.6867, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 50.8308, + "eval_rougeL_for_task677_ollie_data_to_text": 28.8542, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 46.6667, + "eval_rougeL_for_task743_eurlex_title_generation": 19.2719, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 7.2901, + "eval_rougeL_for_task769_qed_title_generation": 67.3637, + "eval_rougeL_for_task827_copa_cause_effect_classification": 33.3333, + "eval_rougeL_for_task828_copa_cause_effect_classification": 30.4, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 7.4124, + "eval_rougeL_for_task891_gap_coreference_resolution": 39.6667, + "eval_rougeL_for_task892_gap_coreference_resolution": 14.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 26.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 37.3333, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 45.4584, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 38.2781, + "eval_rougeL_for_task957_e2e_data_to_text": 42.2841, + "eval_rougeL_for_task970_sherliic_textual_entailment": 0.0, + "eval_rougeL_for_textual_entailment": 21.6769, + "eval_rougeL_for_title_generation": 27.4256, + "eval_rougeL_for_word_analogy": 1.5417, + "eval_runtime": 2851.9569, + "eval_samples_per_second": 4.176, + "eval_steps_per_second": 0.261, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 1.7618, + "step": 50 + }, + { + "epoch": 0.01, + "eval_exact_match": 29.2024, + "eval_exact_match_for_answerability_classification": 49.8462, + "eval_exact_match_for_cause_effect_classification": 35.7143, + "eval_exact_match_for_coreference_resolution": 38.2857, + "eval_exact_match_for_data_to_text": 7.6271, + "eval_exact_match_for_dialogue_act_recognition": 45.4286, + "eval_exact_match_for_grammar_error_correction": 9.0, + "eval_exact_match_for_keyword_tagging": 43.4, + "eval_exact_match_for_overlap_extraction": 10.5, + "eval_exact_match_for_question_rewriting": 1.8182, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 42.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 40.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 21.0, + "eval_exact_match_for_task1153_bard_word_analogy": 25.0, + "eval_exact_match_for_task1154_bard_word_analogy": 16.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 46.0, + "eval_exact_match_for_task1157_bard_word_analogy": 55.0, + "eval_exact_match_for_task1158_bard_word_analogy": 23.0, + "eval_exact_match_for_task1159_bard_word_analogy": 17.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 57.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 18.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 56.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 30.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 7.0, + "eval_exact_match_for_task220_rocstories_title_generation": 75.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 51.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 41.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 62.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 32.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 2.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 8.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 10.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 18.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 25.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 48.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 84.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 11.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 76.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 29.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 37.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 70.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 45.0, + "eval_exact_match_for_textual_entailment": 39.2917, + "eval_exact_match_for_title_generation": 10.7063, + "eval_exact_match_for_word_analogy": 31.625, + "eval_f1": 46.0933, + "eval_f1_for_answerability_classification": 52.4103, + "eval_f1_for_cause_effect_classification": 55.7142, + "eval_f1_for_coreference_resolution": 45.1074, + "eval_f1_for_data_to_text": 53.8438, + "eval_f1_for_dialogue_act_recognition": 49.0, + "eval_f1_for_grammar_error_correction": 56.9407, + "eval_f1_for_keyword_tagging": 55.3024, + "eval_f1_for_overlap_extraction": 34.4972, + "eval_f1_for_question_rewriting": 66.5207, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 42.0, + "eval_f1_for_task034_winogrande_question_rewriting": 72.5511, + "eval_f1_for_task035_winogrande_question_rewriting": 83.3835, + "eval_f1_for_task036_qasc_keyword_tagging": 70.7167, + "eval_f1_for_task039_qasc_overlap_extraction": 24.3333, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 56.0761, + "eval_f1_for_task1152_bard_word_analogy": 21.0, + "eval_f1_for_task1153_bard_word_analogy": 25.0, + "eval_f1_for_task1154_bard_word_analogy": 16.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 46.0, + "eval_f1_for_task1157_bard_word_analogy": 55.0, + "eval_f1_for_task1158_bard_word_analogy": 23.0, + "eval_f1_for_task1159_bard_word_analogy": 17.0, + "eval_f1_for_task1161_coda_19_title_generation": 28.7181, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.1539, + "eval_f1_for_task121_atomic_question_rewriting": 49.0983, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.4245, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 37.5067, + "eval_f1_for_task1356_xlsum_title_generation": 14.2003, + "eval_f1_for_task1358_xlsum_title_generation": 35.9439, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 57.0, + "eval_f1_for_task1407_dart_data_to_text": 40.7465, + "eval_f1_for_task1409_dart_data_to_text": 53.2023, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.3148, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 35.7754, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.5666, + "eval_f1_for_task1562_zest_question_rewriting": 50.1974, + "eval_f1_for_task1586_scifact_title_generation": 31.1942, + "eval_f1_for_task1598_nyc_data_to_text": 47.8929, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.9193, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.9709, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 37.7866, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 69.9524, + "eval_f1_for_task1728_web_nlg_data_to_text": 57.9505, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 19.6216, + "eval_f1_for_task220_rocstories_title_generation": 75.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 49.0, + "eval_f1_for_task233_iirc_answerability_classification": 51.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 54.2048, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 44.661, + "eval_f1_for_task288_gigaword_title_generation": 32.4417, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 9.3333, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 67.0905, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.5551, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 32.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 82.319, + "eval_f1_for_task418_persent_title_generation": 25.4611, + "eval_f1_for_task442_com_qa_question_rewriting": 68.6107, + "eval_f1_for_task500_scruples_title_generation": 18.7237, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.9776, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 43.7596, + "eval_f1_for_task602_wikitext_title_generation": 13.2755, + "eval_f1_for_task613_liar_keyword_tagging": 18.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 40.7774, + "eval_f1_for_task619_ohsumed_title_generation": 37.9964, + "eval_f1_for_task620_ohsumed_keyword_tagging": 46.0, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 30.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 48.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 91.1286, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 28.7556, + "eval_f1_for_task670_ambigqa_question_rewriting": 72.5278, + "eval_f1_for_task671_ambigqa_question_rewriting": 58.4596, + "eval_f1_for_task677_ollie_data_to_text": 35.334, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 29.0484, + "eval_f1_for_task760_msr_sqa_data_to_text": 7.1889, + "eval_f1_for_task769_qed_title_generation": 82.7863, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 29.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 58.5, + "eval_f1_for_task892_gap_coreference_resolution": 49.0, + "eval_f1_for_task893_gap_coreference_resolution": 37.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 70.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_f1_for_task957_e2e_data_to_text": 56.7074, + "eval_f1_for_task970_sherliic_textual_entailment": 45.0, + "eval_f1_for_textual_entailment": 39.2917, + "eval_f1_for_title_generation": 34.3055, + "eval_f1_for_word_analogy": 31.625, + "eval_gen_len": 10.3191, + "eval_global_step": 50, + "eval_loss": 1.0946542024612427, + "eval_rouge1": 48.0335, + "eval_rouge1_for_answerability_classification": 52.4103, + "eval_rouge1_for_cause_effect_classification": 56.8007, + "eval_rouge1_for_coreference_resolution": 46.1912, + "eval_rouge1_for_data_to_text": 56.8368, + "eval_rouge1_for_dialogue_act_recognition": 53.7551, + "eval_rouge1_for_grammar_error_correction": 61.924, + "eval_rouge1_for_keyword_tagging": 60.029, + "eval_rouge1_for_overlap_extraction": 38.4496, + "eval_rouge1_for_question_rewriting": 68.2683, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 46.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 72.5731, + "eval_rouge1_for_task035_winogrande_question_rewriting": 84.4531, + "eval_rouge1_for_task036_qasc_keyword_tagging": 77.8833, + "eval_rouge1_for_task039_qasc_overlap_extraction": 31.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 69.8629, + "eval_rouge1_for_task1152_bard_word_analogy": 21.0, + "eval_rouge1_for_task1153_bard_word_analogy": 25.0, + "eval_rouge1_for_task1154_bard_word_analogy": 16.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 46.0, + "eval_rouge1_for_task1157_bard_word_analogy": 55.0, + "eval_rouge1_for_task1158_bard_word_analogy": 23.0, + "eval_rouge1_for_task1159_bard_word_analogy": 17.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 32.7158, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 78.9537, + "eval_rouge1_for_task121_atomic_question_rewriting": 51.3883, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.9195, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 40.8293, + "eval_rouge1_for_task1356_xlsum_title_generation": 17.864, + "eval_rouge1_for_task1358_xlsum_title_generation": 42.0183, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 62.2857, + "eval_rouge1_for_task1407_dart_data_to_text": 41.8804, + "eval_rouge1_for_task1409_dart_data_to_text": 54.0022, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.9918, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 38.6926, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.8562, + "eval_rouge1_for_task1562_zest_question_rewriting": 53.3238, + "eval_rouge1_for_task1586_scifact_title_generation": 35.0911, + "eval_rouge1_for_task1598_nyc_data_to_text": 50.5792, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.6169, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 95.2173, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 39.2031, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 69.9524, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.5793, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 27.1041, + "eval_rouge1_for_task220_rocstories_title_generation": 75.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 51.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 55.05, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 45.5658, + "eval_rouge1_for_task288_gigaword_title_generation": 35.0351, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 9.3333, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 67.019, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.3793, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 43.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 84.8019, + "eval_rouge1_for_task418_persent_title_generation": 28.752, + "eval_rouge1_for_task442_com_qa_question_rewriting": 71.8318, + "eval_rouge1_for_task500_scruples_title_generation": 20.4716, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.7422, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.9778, + "eval_rouge1_for_task602_wikitext_title_generation": 13.8177, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 47.5587, + "eval_rouge1_for_task619_ohsumed_title_generation": 41.0394, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 49.3, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 48.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 91.1286, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 28.6556, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 73.8098, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 59.3699, + "eval_rouge1_for_task677_ollie_data_to_text": 37.9904, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 30.5236, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.6401, + "eval_rouge1_for_task769_qed_title_generation": 83.1529, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 58.3333, + "eval_rouge1_for_task892_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 37.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 70.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_rouge1_for_task957_e2e_data_to_text": 58.374, + "eval_rouge1_for_task970_sherliic_textual_entailment": 45.0, + "eval_rouge1_for_textual_entailment": 41.1528, + "eval_rouge1_for_title_generation": 36.8223, + "eval_rouge1_for_word_analogy": 31.625, + "eval_rougeL": 46.5269, + "eval_rougeL_for_answerability_classification": 52.4103, + "eval_rougeL_for_cause_effect_classification": 55.9625, + "eval_rougeL_for_coreference_resolution": 46.1912, + "eval_rougeL_for_data_to_text": 48.4114, + "eval_rougeL_for_dialogue_act_recognition": 53.7551, + "eval_rougeL_for_grammar_error_correction": 61.2403, + "eval_rougeL_for_keyword_tagging": 59.5624, + "eval_rougeL_for_overlap_extraction": 37.8964, + "eval_rougeL_for_question_rewriting": 64.1442, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 46.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 72.4352, + "eval_rougeL_for_task035_winogrande_question_rewriting": 82.9875, + "eval_rougeL_for_task036_qasc_keyword_tagging": 77.55, + "eval_rougeL_for_task039_qasc_overlap_extraction": 31.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 59.682, + "eval_rougeL_for_task1152_bard_word_analogy": 21.0, + "eval_rougeL_for_task1153_bard_word_analogy": 25.0, + "eval_rougeL_for_task1154_bard_word_analogy": 16.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 46.0, + "eval_rougeL_for_task1157_bard_word_analogy": 55.0, + "eval_rougeL_for_task1158_bard_word_analogy": 23.0, + "eval_rougeL_for_task1159_bard_word_analogy": 17.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.4434, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 77.0012, + "eval_rougeL_for_task121_atomic_question_rewriting": 46.9653, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 10.3696, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 37.1878, + "eval_rougeL_for_task1356_xlsum_title_generation": 14.9765, + "eval_rougeL_for_task1358_xlsum_title_generation": 35.0009, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 62.2857, + "eval_rougeL_for_task1407_dart_data_to_text": 33.4385, + "eval_rougeL_for_task1409_dart_data_to_text": 44.023, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.4892, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 36.0262, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.9913, + "eval_rougeL_for_task1562_zest_question_rewriting": 47.4892, + "eval_rougeL_for_task1586_scifact_title_generation": 27.9526, + "eval_rougeL_for_task1598_nyc_data_to_text": 37.8924, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.8567, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 95.0071, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 33.7868, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 69.9524, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.9496, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 27.1041, + "eval_rougeL_for_task220_rocstories_title_generation": 75.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 51.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 55.05, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 44.4595, + "eval_rougeL_for_task288_gigaword_title_generation": 30.3075, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 9.3333, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 67.019, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.766, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 43.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 68.5633, + "eval_rougeL_for_task418_persent_title_generation": 24.6613, + "eval_rougeL_for_task442_com_qa_question_rewriting": 66.3295, + "eval_rougeL_for_task500_scruples_title_generation": 18.7792, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 41.3418, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 43.4078, + "eval_rougeL_for_task602_wikitext_title_generation": 13.8177, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 43.3048, + "eval_rougeL_for_task619_ohsumed_title_generation": 36.784, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 47.7, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 48.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 90.7286, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 28.6556, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 71.5592, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 57.2108, + "eval_rougeL_for_task677_ollie_data_to_text": 30.4476, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 27.4044, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.5213, + "eval_rougeL_for_task769_qed_title_generation": 83.1529, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 58.3333, + "eval_rougeL_for_task892_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 37.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 70.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_rougeL_for_task957_e2e_data_to_text": 44.7428, + "eval_rougeL_for_task970_sherliic_textual_entailment": 45.0, + "eval_rougeL_for_textual_entailment": 41.1528, + "eval_rougeL_for_title_generation": 33.8064, + "eval_rougeL_for_word_analogy": 31.625, + "eval_runtime": 1035.4211, + "eval_samples_per_second": 11.503, + "eval_steps_per_second": 0.72, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 1.2952, + "step": 100 + }, + { + "epoch": 0.02, + "eval_exact_match": 27.8002, + "eval_exact_match_for_answerability_classification": 42.3846, + "eval_exact_match_for_cause_effect_classification": 35.7143, + "eval_exact_match_for_coreference_resolution": 36.5714, + "eval_exact_match_for_data_to_text": 6.7797, + "eval_exact_match_for_dialogue_act_recognition": 46.7143, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 38.8, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 1.7273, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 43.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 38.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 49.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 20.0, + "eval_exact_match_for_task1153_bard_word_analogy": 21.0, + "eval_exact_match_for_task1154_bard_word_analogy": 17.0, + "eval_exact_match_for_task1155_bard_word_analogy": 63.0, + "eval_exact_match_for_task1156_bard_word_analogy": 36.0, + "eval_exact_match_for_task1157_bard_word_analogy": 51.0, + "eval_exact_match_for_task1158_bard_word_analogy": 18.0, + "eval_exact_match_for_task1159_bard_word_analogy": 17.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 40.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 35.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 4.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 40.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 51.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 27.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, + "eval_exact_match_for_task190_snli_textual_entailment": 49.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 12.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 2.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 46.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 51.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 61.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 16.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 6.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 12.0, + "eval_exact_match_for_task602_wikitext_title_generation": 5.9524, + "eval_exact_match_for_task613_liar_keyword_tagging": 12.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 13.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 32.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 81.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 7.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 71.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 44.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.5, + "eval_exact_match_for_title_generation": 9.6973, + "eval_exact_match_for_word_analogy": 30.375, + "eval_f1": 44.6228, + "eval_f1_for_answerability_classification": 44.8974, + "eval_f1_for_cause_effect_classification": 54.8739, + "eval_f1_for_coreference_resolution": 42.1175, + "eval_f1_for_data_to_text": 54.0899, + "eval_f1_for_dialogue_act_recognition": 50.3571, + "eval_f1_for_grammar_error_correction": 56.8646, + "eval_f1_for_keyword_tagging": 50.5376, + "eval_f1_for_overlap_extraction": 34.0076, + "eval_f1_for_question_rewriting": 68.4843, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 44.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 73.8096, + "eval_f1_for_task035_winogrande_question_rewriting": 86.1814, + "eval_f1_for_task036_qasc_keyword_tagging": 66.031, + "eval_f1_for_task039_qasc_overlap_extraction": 28.3333, + "eval_f1_for_task050_multirc_answerability_classification": 49.0, + "eval_f1_for_task102_commongen_data_to_text": 55.65, + "eval_f1_for_task1152_bard_word_analogy": 20.0, + "eval_f1_for_task1153_bard_word_analogy": 21.0, + "eval_f1_for_task1154_bard_word_analogy": 17.0, + "eval_f1_for_task1155_bard_word_analogy": 63.0, + "eval_f1_for_task1156_bard_word_analogy": 36.6667, + "eval_f1_for_task1157_bard_word_analogy": 51.0, + "eval_f1_for_task1158_bard_word_analogy": 18.0, + "eval_f1_for_task1159_bard_word_analogy": 17.0, + "eval_f1_for_task1161_coda_19_title_generation": 26.0095, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.9955, + "eval_f1_for_task121_atomic_question_rewriting": 50.9824, + "eval_f1_for_task133_winowhy_coreference_resolution": 40.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.0786, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.2366, + "eval_f1_for_task1356_xlsum_title_generation": 13.5849, + "eval_f1_for_task1358_xlsum_title_generation": 35.3944, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 35.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 54.0, + "eval_f1_for_task1407_dart_data_to_text": 42.8334, + "eval_f1_for_task1409_dart_data_to_text": 51.4929, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 28.979, + "eval_f1_for_task1439_doqa_answerability_classification": 50.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 35.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 35.4504, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.7502, + "eval_f1_for_task1562_zest_question_rewriting": 51.8576, + "eval_f1_for_task1586_scifact_title_generation": 28.3939, + "eval_f1_for_task1598_nyc_data_to_text": 52.4526, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 40.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.5016, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_f1_for_task1631_open_pi_data_to_text": 93.0537, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 37.1482, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 71.4159, + "eval_f1_for_task1728_web_nlg_data_to_text": 58.1463, + "eval_f1_for_task190_snli_textual_entailment": 49.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 24.9079, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 2.0, + "eval_f1_for_task233_iirc_answerability_classification": 0.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 58.3048, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 39.6819, + "eval_f1_for_task288_gigaword_title_generation": 30.1867, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 5.0, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 66.2524, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 29.9156, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 17.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 82.5101, + "eval_f1_for_task418_persent_title_generation": 23.4526, + "eval_f1_for_task442_com_qa_question_rewriting": 71.2501, + "eval_f1_for_task500_scruples_title_generation": 14.3921, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 40.3334, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 45.3202, + "eval_f1_for_task602_wikitext_title_generation": 16.1325, + "eval_f1_for_task613_liar_keyword_tagging": 13.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 37.535, + "eval_f1_for_task619_ohsumed_title_generation": 40.1032, + "eval_f1_for_task620_ohsumed_keyword_tagging": 33.8, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 32.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 89.5238, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 1.7524, + "eval_f1_for_task670_ambigqa_question_rewriting": 75.4903, + "eval_f1_for_task671_ambigqa_question_rewriting": 62.5126, + "eval_f1_for_task677_ollie_data_to_text": 34.8555, + "eval_f1_for_task738_perspectrum_textual_entailment": 7.0, + "eval_f1_for_task743_eurlex_title_generation": 27.9732, + "eval_f1_for_task760_msr_sqa_data_to_text": 5.6103, + "eval_f1_for_task769_qed_title_generation": 78.1585, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_f1_for_task890_gwsd_textual_entailment": 44.0, + "eval_f1_for_task891_gap_coreference_resolution": 53.919, + "eval_f1_for_task892_gap_coreference_resolution": 50.0, + "eval_f1_for_task893_gap_coreference_resolution": 48.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 56.8398, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 39.5, + "eval_f1_for_title_generation": 32.4237, + "eval_f1_for_word_analogy": 30.4583, + "eval_gen_len": 10.7377, + "eval_global_step": 100, + "eval_loss": 1.0964491367340088, + "eval_rouge1": 46.6235, + "eval_rouge1_for_answerability_classification": 44.8974, + "eval_rouge1_for_cause_effect_classification": 55.9809, + "eval_rouge1_for_coreference_resolution": 42.8074, + "eval_rouge1_for_data_to_text": 57.3528, + "eval_rouge1_for_dialogue_act_recognition": 51.6313, + "eval_rouge1_for_grammar_error_correction": 61.8388, + "eval_rouge1_for_keyword_tagging": 55.57, + "eval_rouge1_for_overlap_extraction": 36.388, + "eval_rouge1_for_question_rewriting": 70.1195, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 47.1667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 73.8198, + "eval_rouge1_for_task035_winogrande_question_rewriting": 86.9038, + "eval_rouge1_for_task036_qasc_keyword_tagging": 71.25, + "eval_rouge1_for_task039_qasc_overlap_extraction": 32.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 49.0, + "eval_rouge1_for_task102_commongen_data_to_text": 69.8998, + "eval_rouge1_for_task1152_bard_word_analogy": 21.0, + "eval_rouge1_for_task1153_bard_word_analogy": 21.0, + "eval_rouge1_for_task1154_bard_word_analogy": 17.0, + "eval_rouge1_for_task1155_bard_word_analogy": 63.0, + "eval_rouge1_for_task1156_bard_word_analogy": 36.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 51.0, + "eval_rouge1_for_task1158_bard_word_analogy": 18.0, + "eval_rouge1_for_task1159_bard_word_analogy": 17.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 29.6399, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.2653, + "eval_rouge1_for_task121_atomic_question_rewriting": 53.2166, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 40.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.5257, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.7105, + "eval_rouge1_for_task1356_xlsum_title_generation": 16.6282, + "eval_rouge1_for_task1358_xlsum_title_generation": 40.3165, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 55.5857, + "eval_rouge1_for_task1407_dart_data_to_text": 44.7196, + "eval_rouge1_for_task1409_dart_data_to_text": 52.8526, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.7038, + "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 35.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 38.0099, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.9738, + "eval_rouge1_for_task1562_zest_question_rewriting": 54.9184, + "eval_rouge1_for_task1586_scifact_title_generation": 31.8055, + "eval_rouge1_for_task1598_nyc_data_to_text": 55.1983, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 80.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.8334, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 93.2754, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 38.8318, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 71.4159, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.7108, + "eval_rouge1_for_task190_snli_textual_entailment": 49.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 29.8651, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 2.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 0.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 59.15, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 40.4427, + "eval_rouge1_for_task288_gigaword_title_generation": 32.3759, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 5.0, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 66.7524, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.393, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 23.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 85.0657, + "eval_rouge1_for_task418_persent_title_generation": 26.0799, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.6724, + "eval_rouge1_for_task500_scruples_title_generation": 15.6704, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 40.9497, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 46.5651, + "eval_rouge1_for_task602_wikitext_title_generation": 16.6334, + "eval_rouge1_for_task613_liar_keyword_tagging": 26.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 44.8068, + "eval_rouge1_for_task619_ohsumed_title_generation": 43.8459, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 39.4333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 32.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 90.5, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 1.7333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 76.2163, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.6918, + "eval_rouge1_for_task677_ollie_data_to_text": 37.9316, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 55.0, + "eval_rouge1_for_task743_eurlex_title_generation": 29.5426, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.8599, + "eval_rouge1_for_task769_qed_title_generation": 77.9585, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 44.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 53.7524, + "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 48.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 58.6221, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 43.1667, + "eval_rouge1_for_title_generation": 34.5618, + "eval_rouge1_for_word_analogy": 30.5833, + "eval_rougeL": 45.1592, + "eval_rougeL_for_answerability_classification": 44.8974, + "eval_rougeL_for_cause_effect_classification": 55.0944, + "eval_rougeL_for_coreference_resolution": 42.8074, + "eval_rougeL_for_data_to_text": 48.8064, + "eval_rougeL_for_dialogue_act_recognition": 51.6313, + "eval_rougeL_for_grammar_error_correction": 61.0773, + "eval_rougeL_for_keyword_tagging": 55.1167, + "eval_rougeL_for_overlap_extraction": 35.9003, + "eval_rougeL_for_question_rewriting": 66.2438, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 47.1667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 73.8198, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.8189, + "eval_rougeL_for_task036_qasc_keyword_tagging": 70.6833, + "eval_rougeL_for_task039_qasc_overlap_extraction": 32.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 49.0, + "eval_rougeL_for_task102_commongen_data_to_text": 57.4588, + "eval_rougeL_for_task1152_bard_word_analogy": 21.0, + "eval_rougeL_for_task1153_bard_word_analogy": 21.0, + "eval_rougeL_for_task1154_bard_word_analogy": 17.0, + "eval_rougeL_for_task1155_bard_word_analogy": 63.0, + "eval_rougeL_for_task1156_bard_word_analogy": 36.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 51.0, + "eval_rougeL_for_task1158_bard_word_analogy": 18.0, + "eval_rougeL_for_task1159_bard_word_analogy": 17.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 23.5262, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.9782, + "eval_rougeL_for_task121_atomic_question_rewriting": 48.7308, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 40.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.9758, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.4191, + "eval_rougeL_for_task1356_xlsum_title_generation": 13.624, + "eval_rougeL_for_task1358_xlsum_title_generation": 33.5237, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 55.5857, + "eval_rougeL_for_task1407_dart_data_to_text": 35.374, + "eval_rougeL_for_task1409_dart_data_to_text": 44.2283, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.2046, + "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 35.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 35.4738, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.95, + "eval_rougeL_for_task1562_zest_question_rewriting": 49.1408, + "eval_rougeL_for_task1586_scifact_title_generation": 25.3767, + "eval_rougeL_for_task1598_nyc_data_to_text": 43.4539, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 80.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.247, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 92.9897, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 33.672, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 71.4159, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.2437, + "eval_rougeL_for_task190_snli_textual_entailment": 49.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 29.8651, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 2.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 0.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 59.15, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 39.4673, + "eval_rougeL_for_task288_gigaword_title_generation": 28.252, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 5.0, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 66.7524, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.8677, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 23.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 68.1395, + "eval_rougeL_for_task418_persent_title_generation": 22.5188, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.8442, + "eval_rougeL_for_task500_scruples_title_generation": 14.8141, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 40.3626, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 44.3324, + "eval_rougeL_for_task602_wikitext_title_generation": 16.6334, + "eval_rougeL_for_task613_liar_keyword_tagging": 26.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 40.1262, + "eval_rougeL_for_task619_ohsumed_title_generation": 38.4738, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 38.1333, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 32.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 90.1, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 1.7333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 74.2873, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.2564, + "eval_rougeL_for_task677_ollie_data_to_text": 30.4698, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 55.0, + "eval_rougeL_for_task743_eurlex_title_generation": 26.9057, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.0347, + "eval_rougeL_for_task769_qed_title_generation": 77.7085, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 44.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 53.7524, + "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 48.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 45.614, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 43.1667, + "eval_rougeL_for_title_generation": 31.7476, + "eval_rougeL_for_word_analogy": 30.5833, + "eval_runtime": 1083.9815, + "eval_samples_per_second": 10.987, + "eval_steps_per_second": 0.687, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 1.2831, + "step": 200 + }, + { + "epoch": 0.04, + "eval_exact_match": 30.6465, + "eval_exact_match_for_answerability_classification": 50.3846, + "eval_exact_match_for_cause_effect_classification": 35.8571, + "eval_exact_match_for_coreference_resolution": 41.0714, + "eval_exact_match_for_data_to_text": 8.2324, + "eval_exact_match_for_dialogue_act_recognition": 45.7143, + "eval_exact_match_for_grammar_error_correction": 6.0, + "eval_exact_match_for_keyword_tagging": 40.4, + "eval_exact_match_for_overlap_extraction": 10.5, + "eval_exact_match_for_question_rewriting": 4.0909, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 51.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 33.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 54.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 23.0, + "eval_exact_match_for_task1153_bard_word_analogy": 26.0, + "eval_exact_match_for_task1154_bard_word_analogy": 19.0, + "eval_exact_match_for_task1155_bard_word_analogy": 69.0, + "eval_exact_match_for_task1156_bard_word_analogy": 43.0, + "eval_exact_match_for_task1157_bard_word_analogy": 66.0, + "eval_exact_match_for_task1158_bard_word_analogy": 39.0, + "eval_exact_match_for_task1159_bard_word_analogy": 30.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 22.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 24.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 41.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 45.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 70.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 43.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 35.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 52.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 27.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 12.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 46.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 17.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 60.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 35.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 19.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 49.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 52.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 24.0, + "eval_exact_match_for_task219_rocstories_title_generation": 11.0, + "eval_exact_match_for_task220_rocstories_title_generation": 54.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 58.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 37.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 60.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 54.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 13.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 64.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 43.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 51.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 48.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 29.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 6.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 10.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 19.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 51.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 32.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 43.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 80.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 52.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 60.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 52.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 31.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 35.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 67.0, + "eval_exact_match_for_textual_entailment": 42.9167, + "eval_exact_match_for_title_generation": 8.7444, + "eval_exact_match_for_word_analogy": 39.375, + "eval_f1": 46.9066, + "eval_f1_for_answerability_classification": 52.9487, + "eval_f1_for_cause_effect_classification": 55.3795, + "eval_f1_for_coreference_resolution": 47.6416, + "eval_f1_for_data_to_text": 53.7237, + "eval_f1_for_dialogue_act_recognition": 49.7857, + "eval_f1_for_grammar_error_correction": 54.7354, + "eval_f1_for_keyword_tagging": 52.5141, + "eval_f1_for_overlap_extraction": 30.681, + "eval_f1_for_question_rewriting": 62.979, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 51.0, + "eval_f1_for_task034_winogrande_question_rewriting": 30.8391, + "eval_f1_for_task035_winogrande_question_rewriting": 83.4332, + "eval_f1_for_task036_qasc_keyword_tagging": 64.4073, + "eval_f1_for_task039_qasc_overlap_extraction": 26.3333, + "eval_f1_for_task050_multirc_answerability_classification": 54.0, + "eval_f1_for_task102_commongen_data_to_text": 53.4101, + "eval_f1_for_task1152_bard_word_analogy": 23.0, + "eval_f1_for_task1153_bard_word_analogy": 26.0, + "eval_f1_for_task1154_bard_word_analogy": 19.0, + "eval_f1_for_task1155_bard_word_analogy": 69.0, + "eval_f1_for_task1156_bard_word_analogy": 43.0, + "eval_f1_for_task1157_bard_word_analogy": 66.0, + "eval_f1_for_task1158_bard_word_analogy": 39.0, + "eval_f1_for_task1159_bard_word_analogy": 30.0, + "eval_f1_for_task1161_coda_19_title_generation": 28.0348, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.4147, + "eval_f1_for_task121_atomic_question_rewriting": 51.7155, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.3404, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.3548, + "eval_f1_for_task1356_xlsum_title_generation": 15.5346, + "eval_f1_for_task1358_xlsum_title_generation": 32.943, + "eval_f1_for_task1385_anli_textual_entailment": 24.0, + "eval_f1_for_task1386_anli_textual_entailment": 32.0, + "eval_f1_for_task1387_anli_textual_entailment": 41.0, + "eval_f1_for_task1388_cb_textual_entailment": 45.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 70.0, + "eval_f1_for_task1407_dart_data_to_text": 43.2767, + "eval_f1_for_task1409_dart_data_to_text": 52.1074, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.2503, + "eval_f1_for_task1439_doqa_answerability_classification": 43.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 35.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 52.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 27.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1540_peer_read_title_generation": 32.7426, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 80.2205, + "eval_f1_for_task1562_zest_question_rewriting": 50.2497, + "eval_f1_for_task1586_scifact_title_generation": 29.8392, + "eval_f1_for_task1598_nyc_data_to_text": 50.5065, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 46.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 81.6669, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.9647, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_f1_for_task1659_billsum_title_generation": 36.5227, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 73.5714, + "eval_f1_for_task1728_web_nlg_data_to_text": 55.7374, + "eval_f1_for_task190_snli_textual_entailment": 19.0, + "eval_f1_for_task199_multinli_textual_entailment": 49.0, + "eval_f1_for_task200_multinli_textual_entailment": 52.0, + "eval_f1_for_task201_multinli_textual_entailment": 35.0, + "eval_f1_for_task202_multinli_textual_entailment": 24.0, + "eval_f1_for_task219_rocstories_title_generation": 29.5159, + "eval_f1_for_task220_rocstories_title_generation": 54.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 58.0, + "eval_f1_for_task233_iirc_answerability_classification": 37.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 60.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 64.9333, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 35.0287, + "eval_f1_for_task288_gigaword_title_generation": 26.7279, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 34.3333, + "eval_f1_for_task329_gap_coreference_resolution": 35.0, + "eval_f1_for_task330_gap_coreference_resolution": 69.8778, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 71.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 82.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.4216, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 31.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 80.4987, + "eval_f1_for_task418_persent_title_generation": 21.0078, + "eval_f1_for_task442_com_qa_question_rewriting": 68.4025, + "eval_f1_for_task500_scruples_title_generation": 18.2181, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 44.6618, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 42.9971, + "eval_f1_for_task602_wikitext_title_generation": 12.7204, + "eval_f1_for_task613_liar_keyword_tagging": 22.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 36.9018, + "eval_f1_for_task619_ohsumed_title_generation": 40.4833, + "eval_f1_for_task620_ohsumed_keyword_tagging": 38.7396, + "eval_f1_for_task623_ohsumed_keyword_tagging": 51.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 32.0, + "eval_f1_for_task642_e_snli_textual_entailment": 43.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 86.0905, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 73.8566, + "eval_f1_for_task671_ambigqa_question_rewriting": 54.3376, + "eval_f1_for_task677_ollie_data_to_text": 35.2752, + "eval_f1_for_task738_perspectrum_textual_entailment": 52.0, + "eval_f1_for_task743_eurlex_title_generation": 29.6274, + "eval_f1_for_task760_msr_sqa_data_to_text": 2.2315, + "eval_f1_for_task769_qed_title_generation": 73.2963, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 52.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 31.0, + "eval_f1_for_task890_gwsd_textual_entailment": 35.0, + "eval_f1_for_task891_gap_coreference_resolution": 61.6, + "eval_f1_for_task892_gap_coreference_resolution": 44.0, + "eval_f1_for_task893_gap_coreference_resolution": 51.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_f1_for_task957_e2e_data_to_text": 57.8993, + "eval_f1_for_task970_sherliic_textual_entailment": 67.0, + "eval_f1_for_textual_entailment": 42.9167, + "eval_f1_for_title_generation": 32.5212, + "eval_f1_for_word_analogy": 39.375, + "eval_gen_len": 10.2397, + "eval_global_step": 200, + "eval_loss": 1.0702121257781982, + "eval_rouge1": 48.6396, + "eval_rouge1_for_answerability_classification": 52.9487, + "eval_rouge1_for_cause_effect_classification": 56.4862, + "eval_rouge1_for_coreference_resolution": 48.466, + "eval_rouge1_for_data_to_text": 56.5999, + "eval_rouge1_for_dialogue_act_recognition": 53.3286, + "eval_rouge1_for_grammar_error_correction": 59.8279, + "eval_rouge1_for_keyword_tagging": 57.4537, + "eval_rouge1_for_overlap_extraction": 33.6289, + "eval_rouge1_for_question_rewriting": 64.7618, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 51.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 30.8973, + "eval_rouge1_for_task035_winogrande_question_rewriting": 84.0663, + "eval_rouge1_for_task036_qasc_keyword_tagging": 69.7861, + "eval_rouge1_for_task039_qasc_overlap_extraction": 31.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 54.0, + "eval_rouge1_for_task102_commongen_data_to_text": 66.0293, + "eval_rouge1_for_task1152_bard_word_analogy": 23.0, + "eval_rouge1_for_task1153_bard_word_analogy": 26.0, + "eval_rouge1_for_task1154_bard_word_analogy": 19.0, + "eval_rouge1_for_task1155_bard_word_analogy": 69.0, + "eval_rouge1_for_task1156_bard_word_analogy": 43.0, + "eval_rouge1_for_task1157_bard_word_analogy": 66.0, + "eval_rouge1_for_task1158_bard_word_analogy": 39.0, + "eval_rouge1_for_task1159_bard_word_analogy": 30.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 31.4257, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.0086, + "eval_rouge1_for_task121_atomic_question_rewriting": 53.9791, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.7797, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.1923, + "eval_rouge1_for_task1356_xlsum_title_generation": 18.1996, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.654, + "eval_rouge1_for_task1385_anli_textual_entailment": 24.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 41.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 45.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 74.8, + "eval_rouge1_for_task1407_dart_data_to_text": 44.4255, + "eval_rouge1_for_task1409_dart_data_to_text": 53.5381, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.4676, + "eval_rouge1_for_task1439_doqa_answerability_classification": 43.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 35.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 52.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 35.0534, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 83.1882, + "eval_rouge1_for_task1562_zest_question_rewriting": 54.6914, + "eval_rouge1_for_task1586_scifact_title_generation": 33.3433, + "eval_rouge1_for_task1598_nyc_data_to_text": 52.948, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 82.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 82.0529, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 95.125, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1659_billsum_title_generation": 38.5171, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 73.5714, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 57.5233, + "eval_rouge1_for_task190_snli_textual_entailment": 19.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 49.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 52.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 24.0, + "eval_rouge1_for_task219_rocstories_title_generation": 34.562, + "eval_rouge1_for_task220_rocstories_title_generation": 54.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 58.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 37.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 60.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 65.5524, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 35.9246, + "eval_rouge1_for_task288_gigaword_title_generation": 29.0718, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 35.8, + "eval_rouge1_for_task329_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 69.8333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 71.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 82.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.1363, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 41.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 83.4559, + "eval_rouge1_for_task418_persent_title_generation": 23.7489, + "eval_rouge1_for_task442_com_qa_question_rewriting": 71.6548, + "eval_rouge1_for_task500_scruples_title_generation": 20.025, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 45.3612, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 43.424, + "eval_rouge1_for_task602_wikitext_title_generation": 13.7451, + "eval_rouge1_for_task613_liar_keyword_tagging": 34.9667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 43.9335, + "eval_rouge1_for_task619_ohsumed_title_generation": 43.2663, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.6396, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 51.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 32.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 43.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 86.8762, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 75.038, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 55.343, + "eval_rouge1_for_task677_ollie_data_to_text": 37.5787, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 57.0, + "eval_rouge1_for_task743_eurlex_title_generation": 30.8368, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.2913, + "eval_rouge1_for_task769_qed_title_generation": 73.5271, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 52.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 43.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 35.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 61.4333, + "eval_rouge1_for_task892_gap_coreference_resolution": 44.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_rouge1_for_task957_e2e_data_to_text": 59.7514, + "eval_rouge1_for_task970_sherliic_textual_entailment": 67.0, + "eval_rouge1_for_textual_entailment": 44.625, + "eval_rouge1_for_title_generation": 34.6044, + "eval_rouge1_for_word_analogy": 39.375, + "eval_rougeL": 47.1494, + "eval_rougeL_for_answerability_classification": 52.9487, + "eval_rougeL_for_cause_effect_classification": 55.6277, + "eval_rougeL_for_coreference_resolution": 48.466, + "eval_rougeL_for_data_to_text": 48.5963, + "eval_rougeL_for_dialogue_act_recognition": 53.3286, + "eval_rougeL_for_grammar_error_correction": 58.8474, + "eval_rougeL_for_keyword_tagging": 56.9404, + "eval_rougeL_for_overlap_extraction": 32.9263, + "eval_rougeL_for_question_rewriting": 60.6373, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 51.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 30.8973, + "eval_rougeL_for_task035_winogrande_question_rewriting": 82.9356, + "eval_rougeL_for_task036_qasc_keyword_tagging": 68.2195, + "eval_rougeL_for_task039_qasc_overlap_extraction": 31.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 54.0, + "eval_rougeL_for_task102_commongen_data_to_text": 55.0518, + "eval_rougeL_for_task1152_bard_word_analogy": 23.0, + "eval_rougeL_for_task1153_bard_word_analogy": 26.0, + "eval_rougeL_for_task1154_bard_word_analogy": 19.0, + "eval_rougeL_for_task1155_bard_word_analogy": 69.0, + "eval_rougeL_for_task1156_bard_word_analogy": 43.0, + "eval_rougeL_for_task1157_bard_word_analogy": 66.0, + "eval_rougeL_for_task1158_bard_word_analogy": 39.0, + "eval_rougeL_for_task1159_bard_word_analogy": 30.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 24.7479, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.1404, + "eval_rougeL_for_task121_atomic_question_rewriting": 49.1722, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.9741, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 37.9649, + "eval_rougeL_for_task1356_xlsum_title_generation": 15.5253, + "eval_rougeL_for_task1358_xlsum_title_generation": 30.9437, + "eval_rougeL_for_task1385_anli_textual_entailment": 24.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 41.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 45.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 74.8, + "eval_rougeL_for_task1407_dart_data_to_text": 36.258, + "eval_rougeL_for_task1409_dart_data_to_text": 44.9233, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.6587, + "eval_rougeL_for_task1439_doqa_answerability_classification": 43.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 35.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 52.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 31.5264, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 82.036, + "eval_rougeL_for_task1562_zest_question_rewriting": 48.7225, + "eval_rougeL_for_task1586_scifact_title_generation": 26.7789, + "eval_rougeL_for_task1598_nyc_data_to_text": 42.5241, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 82.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 80.1764, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 95.0341, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1659_billsum_title_generation": 32.7873, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 73.5714, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 51.144, + "eval_rougeL_for_task190_snli_textual_entailment": 19.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 49.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 52.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 24.0, + "eval_rougeL_for_task219_rocstories_title_generation": 33.8286, + "eval_rougeL_for_task220_rocstories_title_generation": 54.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 58.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 37.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 60.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 65.5524, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 34.5193, + "eval_rougeL_for_task288_gigaword_title_generation": 24.7312, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 35.8, + "eval_rougeL_for_task329_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 69.8333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 71.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 82.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.7852, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 41.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 67.3873, + "eval_rougeL_for_task418_persent_title_generation": 20.3789, + "eval_rougeL_for_task442_com_qa_question_rewriting": 65.3368, + "eval_rougeL_for_task500_scruples_title_generation": 18.5266, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 44.9589, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 42.3367, + "eval_rougeL_for_task602_wikitext_title_generation": 13.7451, + "eval_rougeL_for_task613_liar_keyword_tagging": 34.9667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 39.2753, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.2661, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 43.6396, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 51.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 32.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 43.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 86.8762, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 73.0916, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 53.1859, + "eval_rougeL_for_task677_ollie_data_to_text": 30.2724, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 57.0, + "eval_rougeL_for_task743_eurlex_title_generation": 27.1215, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.2153, + "eval_rougeL_for_task769_qed_title_generation": 73.3049, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 52.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 43.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 35.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 61.4333, + "eval_rougeL_for_task892_gap_coreference_resolution": 44.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_rougeL_for_task957_e2e_data_to_text": 45.6216, + "eval_rougeL_for_task970_sherliic_textual_entailment": 67.0, + "eval_rougeL_for_textual_entailment": 44.625, + "eval_rougeL_for_title_generation": 31.5742, + "eval_rougeL_for_word_analogy": 39.375, + "eval_runtime": 1041.1431, + "eval_samples_per_second": 11.439, + "eval_steps_per_second": 0.716, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 1.2014, + "step": 500 + }, + { + "epoch": 0.11, + "eval_exact_match": 30.3862, + "eval_exact_match_for_answerability_classification": 50.0769, + "eval_exact_match_for_cause_effect_classification": 38.2857, + "eval_exact_match_for_coreference_resolution": 40.5714, + "eval_exact_match_for_data_to_text": 6.9007, + "eval_exact_match_for_dialogue_act_recognition": 47.1429, + "eval_exact_match_for_grammar_error_correction": 8.5, + "eval_exact_match_for_keyword_tagging": 39.0, + "eval_exact_match_for_overlap_extraction": 9.0, + "eval_exact_match_for_question_rewriting": 2.3636, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 45.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 22.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 18.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 52.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 22.0, + "eval_exact_match_for_task1153_bard_word_analogy": 28.0, + "eval_exact_match_for_task1154_bard_word_analogy": 24.0, + "eval_exact_match_for_task1155_bard_word_analogy": 67.0, + "eval_exact_match_for_task1156_bard_word_analogy": 41.0, + "eval_exact_match_for_task1157_bard_word_analogy": 54.0, + "eval_exact_match_for_task1158_bard_word_analogy": 42.0, + "eval_exact_match_for_task1159_bard_word_analogy": 34.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 11.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 21.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 46.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 55.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 67.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 34.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 55.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 32.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 51.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 17.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 35.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 51.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 32.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, + "eval_exact_match_for_task190_snli_textual_entailment": 35.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 45.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 40.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 24.0, + "eval_exact_match_for_task219_rocstories_title_generation": 10.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 54.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 7.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 63.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 57.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 53.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 31.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 2.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 5.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 13.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 22.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 45.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 9.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 76.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 24.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 45.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 54.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 68.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 51.0, + "eval_exact_match_for_textual_entailment": 41.875, + "eval_exact_match_for_title_generation": 9.6413, + "eval_exact_match_for_word_analogy": 39.0, + "eval_f1": 47.1842, + "eval_f1_for_answerability_classification": 52.641, + "eval_f1_for_cause_effect_classification": 55.2784, + "eval_f1_for_coreference_resolution": 46.0074, + "eval_f1_for_data_to_text": 53.0392, + "eval_f1_for_dialogue_act_recognition": 50.5, + "eval_f1_for_grammar_error_correction": 57.3021, + "eval_f1_for_keyword_tagging": 53.5156, + "eval_f1_for_overlap_extraction": 33.5263, + "eval_f1_for_question_rewriting": 65.1695, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 48.0, + "eval_f1_for_task034_winogrande_question_rewriting": 46.3788, + "eval_f1_for_task035_winogrande_question_rewriting": 82.9596, + "eval_f1_for_task036_qasc_keyword_tagging": 63.7492, + "eval_f1_for_task039_qasc_overlap_extraction": 25.8333, + "eval_f1_for_task050_multirc_answerability_classification": 52.0, + "eval_f1_for_task102_commongen_data_to_text": 54.4506, + "eval_f1_for_task1152_bard_word_analogy": 22.0, + "eval_f1_for_task1153_bard_word_analogy": 28.6667, + "eval_f1_for_task1154_bard_word_analogy": 24.0, + "eval_f1_for_task1155_bard_word_analogy": 67.0, + "eval_f1_for_task1156_bard_word_analogy": 42.3333, + "eval_f1_for_task1157_bard_word_analogy": 54.0, + "eval_f1_for_task1158_bard_word_analogy": 42.0, + "eval_f1_for_task1159_bard_word_analogy": 34.6667, + "eval_f1_for_task1161_coda_19_title_generation": 34.6758, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 77.6967, + "eval_f1_for_task121_atomic_question_rewriting": 51.0686, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.8882, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.3111, + "eval_f1_for_task1356_xlsum_title_generation": 19.4232, + "eval_f1_for_task1358_xlsum_title_generation": 36.105, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 21.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 46.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 55.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 67.0, + "eval_f1_for_task1407_dart_data_to_text": 36.8875, + "eval_f1_for_task1409_dart_data_to_text": 49.7025, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.8243, + "eval_f1_for_task1439_doqa_answerability_classification": 50.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 34.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 55.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 32.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 38.8587, + "eval_f1_for_task1554_scitail_textual_entailment": 51.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.7799, + "eval_f1_for_task1562_zest_question_rewriting": 52.9146, + "eval_f1_for_task1586_scifact_title_generation": 36.4892, + "eval_f1_for_task1598_nyc_data_to_text": 50.7354, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 35.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 80.2265, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 93.2867, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 37.4581, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 67.5082, + "eval_f1_for_task1728_web_nlg_data_to_text": 57.3273, + "eval_f1_for_task190_snli_textual_entailment": 35.0, + "eval_f1_for_task199_multinli_textual_entailment": 45.0, + "eval_f1_for_task200_multinli_textual_entailment": 40.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 24.0, + "eval_f1_for_task219_rocstories_title_generation": 24.297, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 65.8714, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 41.2193, + "eval_f1_for_task288_gigaword_title_generation": 32.0841, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 12.7667, + "eval_f1_for_task329_gap_coreference_resolution": 48.0, + "eval_f1_for_task330_gap_coreference_resolution": 68.8048, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 85.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 27.7289, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 34.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 81.6426, + "eval_f1_for_task418_persent_title_generation": 25.7146, + "eval_f1_for_task442_com_qa_question_rewriting": 70.3664, + "eval_f1_for_task500_scruples_title_generation": 18.6027, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.947, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 42.9346, + "eval_f1_for_task602_wikitext_title_generation": 14.196, + "eval_f1_for_task613_liar_keyword_tagging": 19.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 31.2198, + "eval_f1_for_task619_ohsumed_title_generation": 44.5412, + "eval_f1_for_task620_ohsumed_keyword_tagging": 41.8667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 45.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.6286, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 11.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 73.532, + "eval_f1_for_task671_ambigqa_question_rewriting": 61.7676, + "eval_f1_for_task677_ollie_data_to_text": 36.3201, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 32.6639, + "eval_f1_for_task760_msr_sqa_data_to_text": 1.4842, + "eval_f1_for_task769_qed_title_generation": 83.272, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 53.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 24.0, + "eval_f1_for_task890_gwsd_textual_entailment": 45.0, + "eval_f1_for_task891_gap_coreference_resolution": 62.9857, + "eval_f1_for_task892_gap_coreference_resolution": 32.0, + "eval_f1_for_task893_gap_coreference_resolution": 47.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 68.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_f1_for_task957_e2e_data_to_text": 59.0075, + "eval_f1_for_task970_sherliic_textual_entailment": 51.0, + "eval_f1_for_textual_entailment": 41.875, + "eval_f1_for_title_generation": 35.139, + "eval_f1_for_word_analogy": 39.3333, + "eval_gen_len": 9.2726, + "eval_global_step": 500, + "eval_loss": 1.0761061906814575, + "eval_rouge1": 49.1005, + "eval_rouge1_for_answerability_classification": 52.641, + "eval_rouge1_for_cause_effect_classification": 56.224, + "eval_rouge1_for_coreference_resolution": 46.6246, + "eval_rouge1_for_data_to_text": 55.9136, + "eval_rouge1_for_dialogue_act_recognition": 53.8408, + "eval_rouge1_for_grammar_error_correction": 62.2561, + "eval_rouge1_for_keyword_tagging": 58.361, + "eval_rouge1_for_overlap_extraction": 35.0683, + "eval_rouge1_for_question_rewriting": 66.8362, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 47.9, + "eval_rouge1_for_task034_winogrande_question_rewriting": 46.3492, + "eval_rouge1_for_task035_winogrande_question_rewriting": 83.5678, + "eval_rouge1_for_task036_qasc_keyword_tagging": 69.3048, + "eval_rouge1_for_task039_qasc_overlap_extraction": 27.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 52.0, + "eval_rouge1_for_task102_commongen_data_to_text": 68.921, + "eval_rouge1_for_task1152_bard_word_analogy": 22.0, + "eval_rouge1_for_task1153_bard_word_analogy": 28.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 24.0, + "eval_rouge1_for_task1155_bard_word_analogy": 67.0, + "eval_rouge1_for_task1156_bard_word_analogy": 42.3333, + "eval_rouge1_for_task1157_bard_word_analogy": 54.0, + "eval_rouge1_for_task1158_bard_word_analogy": 42.0, + "eval_rouge1_for_task1159_bard_word_analogy": 35.6667, + "eval_rouge1_for_task1161_coda_19_title_generation": 38.577, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 78.2652, + "eval_rouge1_for_task121_atomic_question_rewriting": 53.5211, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.534, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.2861, + "eval_rouge1_for_task1356_xlsum_title_generation": 22.1956, + "eval_rouge1_for_task1358_xlsum_title_generation": 40.616, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 21.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 46.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 55.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 73.3857, + "eval_rouge1_for_task1407_dart_data_to_text": 37.8502, + "eval_rouge1_for_task1409_dart_data_to_text": 50.4409, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.5394, + "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 34.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 55.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 42.2777, + "eval_rouge1_for_task1554_scitail_textual_entailment": 51.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.9729, + "eval_rouge1_for_task1562_zest_question_rewriting": 55.1003, + "eval_rouge1_for_task1586_scifact_title_generation": 40.2575, + "eval_rouge1_for_task1598_nyc_data_to_text": 52.0557, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.5243, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 93.4244, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 39.6831, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 67.5082, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 58.7805, + "eval_rouge1_for_task190_snli_textual_entailment": 35.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 45.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 40.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 24.0, + "eval_rouge1_for_task219_rocstories_title_generation": 28.8414, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 66.7167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 42.4699, + "eval_rouge1_for_task288_gigaword_title_generation": 34.6334, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 12.9, + "eval_rouge1_for_task329_gap_coreference_resolution": 48.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 68.7333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 28.6385, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 40.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 84.2289, + "eval_rouge1_for_task418_persent_title_generation": 29.4352, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.5598, + "eval_rouge1_for_task500_scruples_title_generation": 20.4533, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 44.5521, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.2995, + "eval_rouge1_for_task602_wikitext_title_generation": 15.3379, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.8, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 36.9296, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.2393, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 47.2857, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 45.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.4143, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 11.8333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 75.5711, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.2241, + "eval_rouge1_for_task677_ollie_data_to_text": 39.021, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 74.0, + "eval_rouge1_for_task743_eurlex_title_generation": 34.1613, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 1.6797, + "eval_rouge1_for_task769_qed_title_generation": 83.2053, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 45.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 63.319, + "eval_rouge1_for_task892_gap_coreference_resolution": 32.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 47.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 68.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rouge1_for_task957_e2e_data_to_text": 60.916, + "eval_rouge1_for_task970_sherliic_textual_entailment": 51.0, + "eval_rouge1_for_textual_entailment": 44.6806, + "eval_rouge1_for_title_generation": 37.4913, + "eval_rouge1_for_word_analogy": 39.4583, + "eval_rougeL": 47.5758, + "eval_rougeL_for_answerability_classification": 52.641, + "eval_rougeL_for_cause_effect_classification": 55.8309, + "eval_rougeL_for_coreference_resolution": 46.6246, + "eval_rougeL_for_data_to_text": 47.755, + "eval_rougeL_for_dialogue_act_recognition": 53.8408, + "eval_rougeL_for_grammar_error_correction": 61.5046, + "eval_rougeL_for_keyword_tagging": 57.6302, + "eval_rougeL_for_overlap_extraction": 34.358, + "eval_rougeL_for_question_rewriting": 62.5878, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 47.9, + "eval_rougeL_for_task034_winogrande_question_rewriting": 45.8954, + "eval_rougeL_for_task035_winogrande_question_rewriting": 82.5342, + "eval_rougeL_for_task036_qasc_keyword_tagging": 67.4508, + "eval_rougeL_for_task039_qasc_overlap_extraction": 27.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 52.0, + "eval_rougeL_for_task102_commongen_data_to_text": 58.0729, + "eval_rougeL_for_task1152_bard_word_analogy": 22.0, + "eval_rougeL_for_task1153_bard_word_analogy": 28.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 24.0, + "eval_rougeL_for_task1155_bard_word_analogy": 67.0, + "eval_rougeL_for_task1156_bard_word_analogy": 42.3333, + "eval_rougeL_for_task1157_bard_word_analogy": 54.0, + "eval_rougeL_for_task1158_bard_word_analogy": 42.0, + "eval_rougeL_for_task1159_bard_word_analogy": 35.6667, + "eval_rougeL_for_task1161_coda_19_title_generation": 31.3051, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 76.6124, + "eval_rougeL_for_task121_atomic_question_rewriting": 48.5986, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.7934, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.4309, + "eval_rougeL_for_task1356_xlsum_title_generation": 18.3915, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.9547, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 21.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 46.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 55.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 73.3857, + "eval_rougeL_for_task1407_dart_data_to_text": 31.8496, + "eval_rougeL_for_task1409_dart_data_to_text": 41.9212, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.9012, + "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 34.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 55.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 37.5284, + "eval_rougeL_for_task1554_scitail_textual_entailment": 51.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 87.108, + "eval_rougeL_for_task1562_zest_question_rewriting": 47.1551, + "eval_rougeL_for_task1586_scifact_title_generation": 34.0356, + "eval_rougeL_for_task1598_nyc_data_to_text": 40.3924, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.3016, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 92.3204, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 34.161, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 67.5082, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 51.7518, + "eval_rougeL_for_task190_snli_textual_entailment": 35.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 45.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 40.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 24.0, + "eval_rougeL_for_task219_rocstories_title_generation": 28.8414, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 66.7167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 41.0493, + "eval_rougeL_for_task288_gigaword_title_generation": 30.2246, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 12.9, + "eval_rougeL_for_task329_gap_coreference_resolution": 48.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 68.7333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 27.9882, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 40.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 67.2663, + "eval_rougeL_for_task418_persent_title_generation": 26.1959, + "eval_rougeL_for_task442_com_qa_question_rewriting": 68.4157, + "eval_rougeL_for_task500_scruples_title_generation": 19.2319, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 43.6929, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 42.7979, + "eval_rougeL_for_task602_wikitext_title_generation": 15.3379, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.8, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 34.8283, + "eval_rougeL_for_task619_ohsumed_title_generation": 40.6641, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 45.4857, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 45.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.4143, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 11.8333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 73.4622, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 60.7939, + "eval_rougeL_for_task677_ollie_data_to_text": 31.6707, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 74.0, + "eval_rougeL_for_task743_eurlex_title_generation": 30.7891, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.6235, + "eval_rougeL_for_task769_qed_title_generation": 83.2053, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 45.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 63.319, + "eval_rougeL_for_task892_gap_coreference_resolution": 32.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 47.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 68.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rougeL_for_task957_e2e_data_to_text": 46.0555, + "eval_rougeL_for_task970_sherliic_textual_entailment": 51.0, + "eval_rougeL_for_textual_entailment": 44.6806, + "eval_rougeL_for_title_generation": 34.2319, + "eval_rougeL_for_word_analogy": 39.4583, + "eval_runtime": 888.0806, + "eval_samples_per_second": 13.411, + "eval_steps_per_second": 0.839, + "step": 500 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 1.0919, + "step": 1000 + }, + { + "epoch": 0.22, + "eval_exact_match": 30.6885, + "eval_exact_match_for_answerability_classification": 53.4615, + "eval_exact_match_for_cause_effect_classification": 40.4286, + "eval_exact_match_for_coreference_resolution": 44.2143, + "eval_exact_match_for_data_to_text": 7.9903, + "eval_exact_match_for_dialogue_act_recognition": 43.0, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 43.8, + "eval_exact_match_for_overlap_extraction": 14.0, + "eval_exact_match_for_question_rewriting": 2.7273, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 42.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 3.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 20.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 27.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 61.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 30.0, + "eval_exact_match_for_task1153_bard_word_analogy": 25.0, + "eval_exact_match_for_task1154_bard_word_analogy": 23.0, + "eval_exact_match_for_task1155_bard_word_analogy": 67.0, + "eval_exact_match_for_task1156_bard_word_analogy": 38.0, + "eval_exact_match_for_task1157_bard_word_analogy": 55.0, + "eval_exact_match_for_task1158_bard_word_analogy": 40.0, + "eval_exact_match_for_task1159_bard_word_analogy": 32.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 2.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 12.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 1.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 1.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 2.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 24.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 65.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 66.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 2.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 58.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 38.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 27.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 33.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 52.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 54.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 45.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 56.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 34.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 8.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 41.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 66.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 25.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 17.0, + "eval_exact_match_for_task219_rocstories_title_generation": 6.0, + "eval_exact_match_for_task220_rocstories_title_generation": 97.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 61.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 43.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 58.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 57.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 54.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 63.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 51.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 51.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 38.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 19.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 84.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 43.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 75.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 13.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 47.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 57.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 67.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 42.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 60.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 49.0, + "eval_exact_match_for_textual_entailment": 37.3333, + "eval_exact_match_for_title_generation": 10.7623, + "eval_exact_match_for_word_analogy": 38.75, + "eval_f1": 47.0903, + "eval_f1_for_answerability_classification": 56.0256, + "eval_f1_for_cause_effect_classification": 60.4689, + "eval_f1_for_coreference_resolution": 50.6486, + "eval_f1_for_data_to_text": 51.6378, + "eval_f1_for_dialogue_act_recognition": 46.5714, + "eval_f1_for_grammar_error_correction": 57.0915, + "eval_f1_for_keyword_tagging": 58.0512, + "eval_f1_for_overlap_extraction": 39.9381, + "eval_f1_for_question_rewriting": 58.6965, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 45.1667, + "eval_f1_for_task034_winogrande_question_rewriting": 10.7637, + "eval_f1_for_task035_winogrande_question_rewriting": 88.2127, + "eval_f1_for_task036_qasc_keyword_tagging": 56.285, + "eval_f1_for_task039_qasc_overlap_extraction": 34.1667, + "eval_f1_for_task050_multirc_answerability_classification": 61.0, + "eval_f1_for_task102_commongen_data_to_text": 51.6101, + "eval_f1_for_task1152_bard_word_analogy": 30.0, + "eval_f1_for_task1153_bard_word_analogy": 25.0, + "eval_f1_for_task1154_bard_word_analogy": 23.0, + "eval_f1_for_task1155_bard_word_analogy": 67.0, + "eval_f1_for_task1156_bard_word_analogy": 38.0, + "eval_f1_for_task1157_bard_word_analogy": 55.0, + "eval_f1_for_task1158_bard_word_analogy": 40.0, + "eval_f1_for_task1159_bard_word_analogy": 32.0, + "eval_f1_for_task1161_coda_19_title_generation": 35.9366, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 76.3385, + "eval_f1_for_task121_atomic_question_rewriting": 48.9669, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.379, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.3019, + "eval_f1_for_task1356_xlsum_title_generation": 22.0104, + "eval_f1_for_task1358_xlsum_title_generation": 34.8875, + "eval_f1_for_task1385_anli_textual_entailment": 1.0, + "eval_f1_for_task1386_anli_textual_entailment": 1.0, + "eval_f1_for_task1387_anli_textual_entailment": 2.0, + "eval_f1_for_task1388_cb_textual_entailment": 24.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 65.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 66.0, + "eval_f1_for_task1407_dart_data_to_text": 29.3043, + "eval_f1_for_task1409_dart_data_to_text": 47.906, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.6561, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 58.0, + "eval_f1_for_task1516_imppres_textual_entailment": 38.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 27.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 33.0, + "eval_f1_for_task1540_peer_read_title_generation": 38.9441, + "eval_f1_for_task1554_scitail_textual_entailment": 52.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.5269, + "eval_f1_for_task1562_zest_question_rewriting": 51.4579, + "eval_f1_for_task1586_scifact_title_generation": 32.9273, + "eval_f1_for_task1598_nyc_data_to_text": 52.7761, + "eval_f1_for_task1612_sick_textual_entailment": 54.0, + "eval_f1_for_task1615_sick_textual_entailment": 45.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.8382, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.7555, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_f1_for_task1659_billsum_title_generation": 34.9777, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 67.619, + "eval_f1_for_task1728_web_nlg_data_to_text": 58.3536, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 41.0, + "eval_f1_for_task200_multinli_textual_entailment": 66.0, + "eval_f1_for_task201_multinli_textual_entailment": 25.0, + "eval_f1_for_task202_multinli_textual_entailment": 17.0, + "eval_f1_for_task219_rocstories_title_generation": 21.8549, + "eval_f1_for_task220_rocstories_title_generation": 97.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 61.0, + "eval_f1_for_task233_iirc_answerability_classification": 43.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 58.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 66.3, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 45.7095, + "eval_f1_for_task288_gigaword_title_generation": 30.1573, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 31.7333, + "eval_f1_for_task329_gap_coreference_resolution": 54.0, + "eval_f1_for_task330_gap_coreference_resolution": 70.8952, + "eval_f1_for_task349_squad2.0_answerability_classification": 51.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 29.1314, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 39.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 81.5085, + "eval_f1_for_task418_persent_title_generation": 24.202, + "eval_f1_for_task442_com_qa_question_rewriting": 70.5151, + "eval_f1_for_task500_scruples_title_generation": 21.5462, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.7976, + "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 39.7572, + "eval_f1_for_task602_wikitext_title_generation": 12.818, + "eval_f1_for_task613_liar_keyword_tagging": 22.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 45.1507, + "eval_f1_for_task619_ohsumed_title_generation": 43.7408, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.3333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 84.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 43.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 90.3043, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 15.5, + "eval_f1_for_task670_ambigqa_question_rewriting": 76.3653, + "eval_f1_for_task671_ambigqa_question_rewriting": 23.3932, + "eval_f1_for_task677_ollie_data_to_text": 32.8918, + "eval_f1_for_task738_perspectrum_textual_entailment": 47.0, + "eval_f1_for_task743_eurlex_title_generation": 35.8779, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.1913, + "eval_f1_for_task769_qed_title_generation": 78.4858, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 67.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 42.0, + "eval_f1_for_task891_gap_coreference_resolution": 65.5333, + "eval_f1_for_task892_gap_coreference_resolution": 50.0, + "eval_f1_for_task893_gap_coreference_resolution": 49.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_f1_for_task957_e2e_data_to_text": 58.1008, + "eval_f1_for_task970_sherliic_textual_entailment": 49.0, + "eval_f1_for_textual_entailment": 37.3333, + "eval_f1_for_title_generation": 36.9534, + "eval_f1_for_word_analogy": 38.75, + "eval_gen_len": 9.8474, + "eval_global_step": 1000, + "eval_loss": 1.1213940382003784, + "eval_rouge1": 49.949, + "eval_rouge1_for_answerability_classification": 56.0256, + "eval_rouge1_for_cause_effect_classification": 61.463, + "eval_rouge1_for_coreference_resolution": 51.4316, + "eval_rouge1_for_data_to_text": 54.0756, + "eval_rouge1_for_dialogue_act_recognition": 50.7166, + "eval_rouge1_for_grammar_error_correction": 61.967, + "eval_rouge1_for_keyword_tagging": 62.9715, + "eval_rouge1_for_overlap_extraction": 43.7424, + "eval_rouge1_for_question_rewriting": 60.2724, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 45.0667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 10.7223, + "eval_rouge1_for_task035_winogrande_question_rewriting": 88.9041, + "eval_rouge1_for_task036_qasc_keyword_tagging": 62.5675, + "eval_rouge1_for_task039_qasc_overlap_extraction": 41.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 61.0, + "eval_rouge1_for_task102_commongen_data_to_text": 64.4387, + "eval_rouge1_for_task1152_bard_word_analogy": 30.0, + "eval_rouge1_for_task1153_bard_word_analogy": 26.0, + "eval_rouge1_for_task1154_bard_word_analogy": 23.0, + "eval_rouge1_for_task1155_bard_word_analogy": 67.0, + "eval_rouge1_for_task1156_bard_word_analogy": 38.0, + "eval_rouge1_for_task1157_bard_word_analogy": 55.0, + "eval_rouge1_for_task1158_bard_word_analogy": 40.0, + "eval_rouge1_for_task1159_bard_word_analogy": 32.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 39.3063, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 76.7595, + "eval_rouge1_for_task121_atomic_question_rewriting": 51.4637, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.6657, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.3707, + "eval_rouge1_for_task1356_xlsum_title_generation": 24.7831, + "eval_rouge1_for_task1358_xlsum_title_generation": 38.6924, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 65.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 72.3492, + "eval_rouge1_for_task1407_dart_data_to_text": 29.4527, + "eval_rouge1_for_task1409_dart_data_to_text": 47.9519, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.2734, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 58.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 38.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 33.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 42.1621, + "eval_rouge1_for_task1554_scitail_textual_entailment": 52.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.6606, + "eval_rouge1_for_task1562_zest_question_rewriting": 54.1988, + "eval_rouge1_for_task1586_scifact_title_generation": 37.3005, + "eval_rouge1_for_task1598_nyc_data_to_text": 54.1017, + "eval_rouge1_for_task1612_sick_textual_entailment": 54.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 81.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.3568, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.781, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_rouge1_for_task1659_billsum_title_generation": 36.1848, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 67.619, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.8459, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 41.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 66.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 25.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 17.0, + "eval_rouge1_for_task219_rocstories_title_generation": 26.3794, + "eval_rouge1_for_task220_rocstories_title_generation": 97.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 61.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 43.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 58.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 66.9667, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 46.4848, + "eval_rouge1_for_task288_gigaword_title_generation": 32.8173, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 31.9667, + "eval_rouge1_for_task329_gap_coreference_resolution": 54.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 70.8238, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 51.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 29.8122, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 49.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 84.1944, + "eval_rouge1_for_task418_persent_title_generation": 27.6127, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.6349, + "eval_rouge1_for_task500_scruples_title_generation": 23.1421, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.0805, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 42.0765, + "eval_rouge1_for_task602_wikitext_title_generation": 13.383, + "eval_rouge1_for_task613_liar_keyword_tagging": 36.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 51.4291, + "eval_rouge1_for_task619_ohsumed_title_generation": 47.1126, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 40.3667, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 84.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 43.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 91.09, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 15.4, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 77.3655, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 24.0264, + "eval_rouge1_for_task677_ollie_data_to_text": 35.4735, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 72.0, + "eval_rouge1_for_task743_eurlex_title_generation": 37.3452, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.2379, + "eval_rouge1_for_task769_qed_title_generation": 78.7859, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 67.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 42.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 65.8667, + "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rouge1_for_task957_e2e_data_to_text": 59.7767, + "eval_rouge1_for_task970_sherliic_textual_entailment": 49.0, + "eval_rouge1_for_textual_entailment": 44.5694, + "eval_rouge1_for_title_generation": 39.1642, + "eval_rouge1_for_word_analogy": 38.875, + "eval_rougeL": 48.554, + "eval_rougeL_for_answerability_classification": 56.0256, + "eval_rougeL_for_cause_effect_classification": 60.6589, + "eval_rougeL_for_coreference_resolution": 51.4316, + "eval_rougeL_for_data_to_text": 46.9266, + "eval_rougeL_for_dialogue_act_recognition": 50.7166, + "eval_rougeL_for_grammar_error_correction": 61.1608, + "eval_rougeL_for_keyword_tagging": 62.8619, + "eval_rougeL_for_overlap_extraction": 43.2583, + "eval_rougeL_for_question_rewriting": 56.255, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 45.0667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 10.7223, + "eval_rougeL_for_task035_winogrande_question_rewriting": 88.344, + "eval_rougeL_for_task036_qasc_keyword_tagging": 62.4197, + "eval_rougeL_for_task039_qasc_overlap_extraction": 41.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 61.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.6276, + "eval_rougeL_for_task1152_bard_word_analogy": 30.0, + "eval_rougeL_for_task1153_bard_word_analogy": 26.0, + "eval_rougeL_for_task1154_bard_word_analogy": 23.0, + "eval_rougeL_for_task1155_bard_word_analogy": 67.0, + "eval_rougeL_for_task1156_bard_word_analogy": 38.0, + "eval_rougeL_for_task1157_bard_word_analogy": 55.0, + "eval_rougeL_for_task1158_bard_word_analogy": 40.0, + "eval_rougeL_for_task1159_bard_word_analogy": 32.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 32.7293, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 75.1506, + "eval_rougeL_for_task121_atomic_question_rewriting": 44.9085, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.0252, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.525, + "eval_rougeL_for_task1356_xlsum_title_generation": 21.6444, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.6945, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 65.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 72.3492, + "eval_rougeL_for_task1407_dart_data_to_text": 24.5924, + "eval_rougeL_for_task1409_dart_data_to_text": 41.2221, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.5259, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 58.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 38.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 33.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 38.6567, + "eval_rougeL_for_task1554_scitail_textual_entailment": 52.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.7957, + "eval_rougeL_for_task1562_zest_question_rewriting": 48.5434, + "eval_rougeL_for_task1586_scifact_title_generation": 30.1577, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.3406, + "eval_rougeL_for_task1612_sick_textual_entailment": 54.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 81.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.7249, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 94.5992, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_rougeL_for_task1659_billsum_title_generation": 31.5664, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 67.619, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.7076, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 41.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 66.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 25.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 17.0, + "eval_rougeL_for_task219_rocstories_title_generation": 26.3794, + "eval_rougeL_for_task220_rocstories_title_generation": 97.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 61.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 43.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 58.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 66.9667, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 45.5166, + "eval_rougeL_for_task288_gigaword_title_generation": 28.4488, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 31.9667, + "eval_rougeL_for_task329_gap_coreference_resolution": 54.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 70.8238, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 51.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 29.0277, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 49.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 67.8052, + "eval_rougeL_for_task418_persent_title_generation": 23.9172, + "eval_rougeL_for_task442_com_qa_question_rewriting": 68.037, + "eval_rougeL_for_task500_scruples_title_generation": 22.2317, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 41.5861, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 41.6098, + "eval_rougeL_for_task602_wikitext_title_generation": 13.1449, + "eval_rougeL_for_task613_liar_keyword_tagging": 36.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 46.5842, + "eval_rougeL_for_task619_ohsumed_title_generation": 38.9981, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 39.9667, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 84.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 43.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 91.09, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 15.4, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 75.9324, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 22.1115, + "eval_rougeL_for_task677_ollie_data_to_text": 28.7504, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 72.0, + "eval_rougeL_for_task743_eurlex_title_generation": 33.0671, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.0129, + "eval_rougeL_for_task769_qed_title_generation": 78.7859, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 67.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 42.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 65.8667, + "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rougeL_for_task957_e2e_data_to_text": 45.9902, + "eval_rougeL_for_task970_sherliic_textual_entailment": 49.0, + "eval_rougeL_for_textual_entailment": 44.5694, + "eval_rougeL_for_title_generation": 36.1289, + "eval_rougeL_for_word_analogy": 38.875, + "eval_runtime": 1011.0487, + "eval_samples_per_second": 11.78, + "eval_steps_per_second": 0.737, + "step": 1000 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 1.028, + "step": 1500 + }, + { + "epoch": 0.33, + "eval_exact_match": 30.9488, + "eval_exact_match_for_answerability_classification": 52.9231, + "eval_exact_match_for_cause_effect_classification": 40.8571, + "eval_exact_match_for_coreference_resolution": 42.0714, + "eval_exact_match_for_data_to_text": 7.2639, + "eval_exact_match_for_dialogue_act_recognition": 50.5714, + "eval_exact_match_for_grammar_error_correction": 6.5, + "eval_exact_match_for_keyword_tagging": 49.0, + "eval_exact_match_for_overlap_extraction": 9.5, + "eval_exact_match_for_question_rewriting": 2.7273, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 54.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 7.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 36.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 18.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 66.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 32.0, + "eval_exact_match_for_task1153_bard_word_analogy": 29.0, + "eval_exact_match_for_task1154_bard_word_analogy": 27.0, + "eval_exact_match_for_task1155_bard_word_analogy": 68.0, + "eval_exact_match_for_task1156_bard_word_analogy": 42.0, + "eval_exact_match_for_task1157_bard_word_analogy": 55.0, + "eval_exact_match_for_task1158_bard_word_analogy": 38.0, + "eval_exact_match_for_task1159_bard_word_analogy": 39.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 2.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 54.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 1.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 21.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 70.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 27.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 52.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 36.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 45.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 51.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 39.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 36.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 73.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 27.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 24.0, + "eval_exact_match_for_task219_rocstories_title_generation": 5.0, + "eval_exact_match_for_task220_rocstories_title_generation": 90.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 40.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 58.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 42.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 14.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 62.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 49.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 36.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 2.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 5.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 1.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 96.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 39.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 77.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 43.0, + "eval_exact_match_for_task743_eurlex_title_generation": 3.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 68.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 66.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 60.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 59.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task957_e2e_data_to_text": 1.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 70.0, + "eval_exact_match_for_textual_entailment": 36.5833, + "eval_exact_match_for_title_generation": 10.8744, + "eval_exact_match_for_word_analogy": 41.25, + "eval_f1": 48.7971, + "eval_f1_for_answerability_classification": 55.4872, + "eval_f1_for_cause_effect_classification": 61.7299, + "eval_f1_for_coreference_resolution": 48.8048, + "eval_f1_for_data_to_text": 53.2762, + "eval_f1_for_dialogue_act_recognition": 54.1429, + "eval_f1_for_grammar_error_correction": 57.2086, + "eval_f1_for_keyword_tagging": 61.9694, + "eval_f1_for_overlap_extraction": 34.1371, + "eval_f1_for_question_rewriting": 70.3341, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 55.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 87.9518, + "eval_f1_for_task035_winogrande_question_rewriting": 89.0545, + "eval_f1_for_task036_qasc_keyword_tagging": 68.4762, + "eval_f1_for_task039_qasc_overlap_extraction": 22.6667, + "eval_f1_for_task050_multirc_answerability_classification": 66.0, + "eval_f1_for_task102_commongen_data_to_text": 55.5751, + "eval_f1_for_task1152_bard_word_analogy": 32.0, + "eval_f1_for_task1153_bard_word_analogy": 29.0, + "eval_f1_for_task1154_bard_word_analogy": 27.0, + "eval_f1_for_task1155_bard_word_analogy": 68.0, + "eval_f1_for_task1156_bard_word_analogy": 42.0, + "eval_f1_for_task1157_bard_word_analogy": 55.0, + "eval_f1_for_task1158_bard_word_analogy": 38.0, + "eval_f1_for_task1159_bard_word_analogy": 39.0, + "eval_f1_for_task1161_coda_19_title_generation": 38.4165, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.7669, + "eval_f1_for_task121_atomic_question_rewriting": 48.6877, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.0329, + "eval_f1_for_task1344_rte_textual_entailment": 54.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.5934, + "eval_f1_for_task1356_xlsum_title_generation": 23.6523, + "eval_f1_for_task1358_xlsum_title_generation": 33.3598, + "eval_f1_for_task1385_anli_textual_entailment": 0.0, + "eval_f1_for_task1386_anli_textual_entailment": 1.0, + "eval_f1_for_task1387_anli_textual_entailment": 0.0, + "eval_f1_for_task1388_cb_textual_entailment": 21.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 70.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_f1_for_task1407_dart_data_to_text": 34.7959, + "eval_f1_for_task1409_dart_data_to_text": 47.6266, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.8591, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 27.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 46.0, + "eval_f1_for_task1540_peer_read_title_generation": 39.8249, + "eval_f1_for_task1554_scitail_textual_entailment": 52.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.5581, + "eval_f1_for_task1562_zest_question_rewriting": 48.9308, + "eval_f1_for_task1586_scifact_title_generation": 36.301, + "eval_f1_for_task1598_nyc_data_to_text": 51.8334, + "eval_f1_for_task1612_sick_textual_entailment": 36.0, + "eval_f1_for_task1615_sick_textual_entailment": 45.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.5531, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.0887, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_f1_for_task1659_billsum_title_generation": 38.9582, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 73.1714, + "eval_f1_for_task1728_web_nlg_data_to_text": 60.4539, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 36.0, + "eval_f1_for_task200_multinli_textual_entailment": 73.0, + "eval_f1_for_task201_multinli_textual_entailment": 27.0, + "eval_f1_for_task202_multinli_textual_entailment": 24.0, + "eval_f1_for_task219_rocstories_title_generation": 21.9445, + "eval_f1_for_task220_rocstories_title_generation": 90.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 40.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 58.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 54.7333, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 45.6075, + "eval_f1_for_task288_gigaword_title_generation": 29.6137, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 39.0667, + "eval_f1_for_task329_gap_coreference_resolution": 46.0, + "eval_f1_for_task330_gap_coreference_resolution": 68.8286, + "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.071, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 37.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 81.774, + "eval_f1_for_task418_persent_title_generation": 28.9196, + "eval_f1_for_task442_com_qa_question_rewriting": 71.1376, + "eval_f1_for_task500_scruples_title_generation": 21.9018, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 42.6566, + "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 39.0272, + "eval_f1_for_task602_wikitext_title_generation": 15.1685, + "eval_f1_for_task613_liar_keyword_tagging": 20.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 48.7049, + "eval_f1_for_task619_ohsumed_title_generation": 46.9236, + "eval_f1_for_task620_ohsumed_keyword_tagging": 33.2, + "eval_f1_for_task623_ohsumed_keyword_tagging": 96.0, + "eval_f1_for_task640_e_snli_textual_entailment": 30.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 39.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 91.5043, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 11.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.7693, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.4555, + "eval_f1_for_task677_ollie_data_to_text": 35.2353, + "eval_f1_for_task738_perspectrum_textual_entailment": 43.0, + "eval_f1_for_task743_eurlex_title_generation": 38.6046, + "eval_f1_for_task760_msr_sqa_data_to_text": 7.468, + "eval_f1_for_task769_qed_title_generation": 85.8763, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 66.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 60.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_f1_for_task890_gwsd_textual_entailment": 37.0, + "eval_f1_for_task891_gap_coreference_resolution": 65.9667, + "eval_f1_for_task892_gap_coreference_resolution": 50.0, + "eval_f1_for_task893_gap_coreference_resolution": 31.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task957_e2e_data_to_text": 58.5107, + "eval_f1_for_task970_sherliic_textual_entailment": 70.0, + "eval_f1_for_textual_entailment": 36.5833, + "eval_f1_for_title_generation": 38.215, + "eval_f1_for_word_analogy": 41.25, + "eval_gen_len": 9.3165, + "eval_global_step": 1500, + "eval_loss": 1.1518067121505737, + "eval_rouge1": 51.912, + "eval_rouge1_for_answerability_classification": 55.4872, + "eval_rouge1_for_cause_effect_classification": 62.6493, + "eval_rouge1_for_coreference_resolution": 49.3997, + "eval_rouge1_for_data_to_text": 56.4622, + "eval_rouge1_for_dialogue_act_recognition": 58.1587, + "eval_rouge1_for_grammar_error_correction": 62.232, + "eval_rouge1_for_keyword_tagging": 66.8608, + "eval_rouge1_for_overlap_extraction": 37.111, + "eval_rouge1_for_question_rewriting": 71.9559, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 55.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 88.108, + "eval_rouge1_for_task035_winogrande_question_rewriting": 89.647, + "eval_rouge1_for_task036_qasc_keyword_tagging": 74.6661, + "eval_rouge1_for_task039_qasc_overlap_extraction": 27.5, + "eval_rouge1_for_task050_multirc_answerability_classification": 66.0, + "eval_rouge1_for_task102_commongen_data_to_text": 69.8821, + "eval_rouge1_for_task1152_bard_word_analogy": 32.0, + "eval_rouge1_for_task1153_bard_word_analogy": 31.0, + "eval_rouge1_for_task1154_bard_word_analogy": 27.0, + "eval_rouge1_for_task1155_bard_word_analogy": 68.0, + "eval_rouge1_for_task1156_bard_word_analogy": 42.0, + "eval_rouge1_for_task1157_bard_word_analogy": 55.0, + "eval_rouge1_for_task1158_bard_word_analogy": 38.0, + "eval_rouge1_for_task1159_bard_word_analogy": 39.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 42.6332, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.9155, + "eval_rouge1_for_task121_atomic_question_rewriting": 51.4376, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.8378, + "eval_rouge1_for_task1344_rte_textual_entailment": 54.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.8992, + "eval_rouge1_for_task1356_xlsum_title_generation": 27.0428, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.9838, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 70.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 72.7778, + "eval_rouge1_for_task1407_dart_data_to_text": 35.7785, + "eval_rouge1_for_task1409_dart_data_to_text": 48.7368, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.8574, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 27.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 42.8272, + "eval_rouge1_for_task1554_scitail_textual_entailment": 52.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.6066, + "eval_rouge1_for_task1562_zest_question_rewriting": 51.7885, + "eval_rouge1_for_task1586_scifact_title_generation": 40.1626, + "eval_rouge1_for_task1598_nyc_data_to_text": 54.1319, + "eval_rouge1_for_task1612_sick_textual_entailment": 36.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 81.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.9472, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.3135, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_rouge1_for_task1659_billsum_title_generation": 41.1682, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 73.1714, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.4147, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 36.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 73.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 27.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 24.0, + "eval_rouge1_for_task219_rocstories_title_generation": 24.8174, + "eval_rouge1_for_task220_rocstories_title_generation": 90.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 40.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 58.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 55.4, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 46.722, + "eval_rouge1_for_task288_gigaword_title_generation": 33.1476, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 39.9667, + "eval_rouge1_for_task329_gap_coreference_resolution": 46.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 68.7571, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.5559, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 43.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 84.1383, + "eval_rouge1_for_task418_persent_title_generation": 32.7467, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.4565, + "eval_rouge1_for_task500_scruples_title_generation": 23.3076, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.3709, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 41.2396, + "eval_rouge1_for_task602_wikitext_title_generation": 15.8367, + "eval_rouge1_for_task613_liar_keyword_tagging": 32.5, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 54.6559, + "eval_rouge1_for_task619_ohsumed_title_generation": 50.6789, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 39.1333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 96.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 39.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 92.0043, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 11.1667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.8278, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.349, + "eval_rouge1_for_task677_ollie_data_to_text": 38.549, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 89.0, + "eval_rouge1_for_task743_eurlex_title_generation": 40.5448, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.5547, + "eval_rouge1_for_task769_qed_title_generation": 85.7772, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 66.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 60.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 49.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 66.3, + "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task957_e2e_data_to_text": 60.6068, + "eval_rouge1_for_task970_sherliic_textual_entailment": 70.0, + "eval_rouge1_for_textual_entailment": 44.8611, + "eval_rouge1_for_title_generation": 40.616, + "eval_rouge1_for_word_analogy": 41.5, + "eval_rougeL": 50.4083, + "eval_rougeL_for_answerability_classification": 55.4872, + "eval_rougeL_for_cause_effect_classification": 62.0993, + "eval_rougeL_for_coreference_resolution": 49.3997, + "eval_rougeL_for_data_to_text": 48.3117, + "eval_rougeL_for_dialogue_act_recognition": 58.1587, + "eval_rougeL_for_grammar_error_correction": 61.5416, + "eval_rougeL_for_keyword_tagging": 66.4408, + "eval_rougeL_for_overlap_extraction": 36.0229, + "eval_rougeL_for_question_rewriting": 68.1701, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 55.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 87.8166, + "eval_rougeL_for_task035_winogrande_question_rewriting": 89.5845, + "eval_rougeL_for_task036_qasc_keyword_tagging": 73.8994, + "eval_rougeL_for_task039_qasc_overlap_extraction": 27.5, + "eval_rougeL_for_task050_multirc_answerability_classification": 66.0, + "eval_rougeL_for_task102_commongen_data_to_text": 59.3213, + "eval_rougeL_for_task1152_bard_word_analogy": 32.0, + "eval_rougeL_for_task1153_bard_word_analogy": 31.0, + "eval_rougeL_for_task1154_bard_word_analogy": 27.0, + "eval_rougeL_for_task1155_bard_word_analogy": 68.0, + "eval_rougeL_for_task1156_bard_word_analogy": 42.0, + "eval_rougeL_for_task1157_bard_word_analogy": 55.0, + "eval_rougeL_for_task1158_bard_word_analogy": 38.0, + "eval_rougeL_for_task1159_bard_word_analogy": 39.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 35.161, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.1995, + "eval_rougeL_for_task121_atomic_question_rewriting": 45.0345, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.9282, + "eval_rougeL_for_task1344_rte_textual_entailment": 54.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.3422, + "eval_rougeL_for_task1356_xlsum_title_generation": 23.2099, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.1584, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 70.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 72.7778, + "eval_rougeL_for_task1407_dart_data_to_text": 28.8067, + "eval_rougeL_for_task1409_dart_data_to_text": 41.3613, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.3415, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 27.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 38.7312, + "eval_rougeL_for_task1554_scitail_textual_entailment": 52.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.7417, + "eval_rougeL_for_task1562_zest_question_rewriting": 44.8887, + "eval_rougeL_for_task1586_scifact_title_generation": 33.0278, + "eval_rougeL_for_task1598_nyc_data_to_text": 42.3314, + "eval_rougeL_for_task1612_sick_textual_entailment": 36.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 81.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.6828, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 94.1537, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_rougeL_for_task1659_billsum_title_generation": 34.9158, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 73.1714, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 55.2268, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 36.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 73.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 27.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 24.0, + "eval_rougeL_for_task219_rocstories_title_generation": 24.8174, + "eval_rougeL_for_task220_rocstories_title_generation": 90.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 40.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 58.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 55.4, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 44.5459, + "eval_rougeL_for_task288_gigaword_title_generation": 28.674, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 39.9667, + "eval_rougeL_for_task329_gap_coreference_resolution": 46.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 68.7571, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.8652, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 43.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 67.7732, + "eval_rougeL_for_task418_persent_title_generation": 28.5022, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.2962, + "eval_rougeL_for_task500_scruples_title_generation": 21.9897, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.7477, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 39.3665, + "eval_rougeL_for_task602_wikitext_title_generation": 15.8367, + "eval_rougeL_for_task613_liar_keyword_tagging": 32.5, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 51.4965, + "eval_rougeL_for_task619_ohsumed_title_generation": 43.0529, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 37.8, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 96.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 39.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 92.0043, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 11.1667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.4445, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 67.8089, + "eval_rougeL_for_task677_ollie_data_to_text": 30.1132, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 89.0, + "eval_rougeL_for_task743_eurlex_title_generation": 35.6043, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.4194, + "eval_rougeL_for_task769_qed_title_generation": 85.7772, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 66.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 60.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 49.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 66.3, + "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task957_e2e_data_to_text": 46.0713, + "eval_rougeL_for_task970_sherliic_textual_entailment": 70.0, + "eval_rougeL_for_textual_entailment": 44.8611, + "eval_rougeL_for_title_generation": 37.2179, + "eval_rougeL_for_word_analogy": 41.5, + "eval_runtime": 886.217, + "eval_samples_per_second": 13.439, + "eval_steps_per_second": 0.841, + "step": 1500 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 1.0086, + "step": 2000 + }, + { + "epoch": 0.44, + "eval_exact_match": 32.5693, + "eval_exact_match_for_answerability_classification": 55.8462, + "eval_exact_match_for_cause_effect_classification": 49.7143, + "eval_exact_match_for_coreference_resolution": 46.0, + "eval_exact_match_for_data_to_text": 7.2639, + "eval_exact_match_for_dialogue_act_recognition": 47.2857, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 51.6, + "eval_exact_match_for_overlap_extraction": 12.0, + "eval_exact_match_for_question_rewriting": 3.5455, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 54.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 39.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 24.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 73.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 35.0, + "eval_exact_match_for_task1153_bard_word_analogy": 32.0, + "eval_exact_match_for_task1154_bard_word_analogy": 28.0, + "eval_exact_match_for_task1155_bard_word_analogy": 93.0, + "eval_exact_match_for_task1156_bard_word_analogy": 39.0, + "eval_exact_match_for_task1157_bard_word_analogy": 68.0, + "eval_exact_match_for_task1158_bard_word_analogy": 51.0, + "eval_exact_match_for_task1159_bard_word_analogy": 32.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 16.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 51.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 1.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 2.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 18.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 57.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 80.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 53.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 55.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 38.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 49.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 12.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 53.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 40.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 5.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 43.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 80.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 15.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 16.0, + "eval_exact_match_for_task219_rocstories_title_generation": 6.0, + "eval_exact_match_for_task220_rocstories_title_generation": 97.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 88.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 65.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 53.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 61.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 57.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 62.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 54.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 35.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 1.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 20.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 95.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 41.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 32.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 84.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 17.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 41.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 60.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 81.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 70.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 36.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 54.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 74.0, + "eval_exact_match_for_textual_entailment": 36.4583, + "eval_exact_match_for_title_generation": 10.2018, + "eval_exact_match_for_word_analogy": 47.25, + "eval_f1": 48.628, + "eval_f1_for_answerability_classification": 58.4103, + "eval_f1_for_cause_effect_classification": 68.7473, + "eval_f1_for_coreference_resolution": 51.5515, + "eval_f1_for_data_to_text": 50.3659, + "eval_f1_for_dialogue_act_recognition": 50.3571, + "eval_f1_for_grammar_error_correction": 56.7643, + "eval_f1_for_keyword_tagging": 63.8965, + "eval_f1_for_overlap_extraction": 28.9575, + "eval_f1_for_question_rewriting": 60.295, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 56.0, + "eval_f1_for_task034_winogrande_question_rewriting": 12.6407, + "eval_f1_for_task035_winogrande_question_rewriting": 89.1751, + "eval_f1_for_task036_qasc_keyword_tagging": 65.4347, + "eval_f1_for_task039_qasc_overlap_extraction": 32.5, + "eval_f1_for_task050_multirc_answerability_classification": 73.0, + "eval_f1_for_task102_commongen_data_to_text": 51.3894, + "eval_f1_for_task1152_bard_word_analogy": 35.0, + "eval_f1_for_task1153_bard_word_analogy": 32.0, + "eval_f1_for_task1154_bard_word_analogy": 28.0, + "eval_f1_for_task1155_bard_word_analogy": 93.0, + "eval_f1_for_task1156_bard_word_analogy": 39.0, + "eval_f1_for_task1157_bard_word_analogy": 68.0, + "eval_f1_for_task1158_bard_word_analogy": 51.0, + "eval_f1_for_task1159_bard_word_analogy": 32.0, + "eval_f1_for_task1161_coda_19_title_generation": 35.0684, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 82.1069, + "eval_f1_for_task121_atomic_question_rewriting": 47.5678, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 12.4092, + "eval_f1_for_task1344_rte_textual_entailment": 51.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.3838, + "eval_f1_for_task1356_xlsum_title_generation": 21.7507, + "eval_f1_for_task1358_xlsum_title_generation": 34.7803, + "eval_f1_for_task1385_anli_textual_entailment": 1.0, + "eval_f1_for_task1386_anli_textual_entailment": 0.0, + "eval_f1_for_task1387_anli_textual_entailment": 2.0, + "eval_f1_for_task1388_cb_textual_entailment": 18.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 57.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 80.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 49.0, + "eval_f1_for_task1407_dart_data_to_text": 32.2357, + "eval_f1_for_task1409_dart_data_to_text": 49.5846, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.7759, + "eval_f1_for_task1439_doqa_answerability_classification": 51.0, + "eval_f1_for_task1442_doqa_answerability_classification": 52.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 53.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 53.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1540_peer_read_title_generation": 37.1614, + "eval_f1_for_task1554_scitail_textual_entailment": 55.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.7527, + "eval_f1_for_task1562_zest_question_rewriting": 50.6264, + "eval_f1_for_task1586_scifact_title_generation": 34.7137, + "eval_f1_for_task1598_nyc_data_to_text": 48.4418, + "eval_f1_for_task1612_sick_textual_entailment": 38.0, + "eval_f1_for_task1615_sick_textual_entailment": 49.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 81.402, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 89.2479, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_f1_for_task1659_billsum_title_generation": 34.8641, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 76.7143, + "eval_f1_for_task1728_web_nlg_data_to_text": 60.5008, + "eval_f1_for_task190_snli_textual_entailment": 5.0, + "eval_f1_for_task199_multinli_textual_entailment": 43.0, + "eval_f1_for_task200_multinli_textual_entailment": 80.0, + "eval_f1_for_task201_multinli_textual_entailment": 15.0, + "eval_f1_for_task202_multinli_textual_entailment": 16.0, + "eval_f1_for_task219_rocstories_title_generation": 20.7356, + "eval_f1_for_task220_rocstories_title_generation": 97.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_f1_for_task232_iirc_answerability_classification": 47.0, + "eval_f1_for_task233_iirc_answerability_classification": 47.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 88.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 73.65, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 25.4151, + "eval_f1_for_task288_gigaword_title_generation": 28.4976, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 8.4667, + "eval_f1_for_task329_gap_coreference_resolution": 53.0, + "eval_f1_for_task330_gap_coreference_resolution": 67.2571, + "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 78.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 29.1557, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 39.0, + "eval_f1_for_task402_grailqa_question_rewriting": 74.8723, + "eval_f1_for_task418_persent_title_generation": 27.596, + "eval_f1_for_task442_com_qa_question_rewriting": 70.0985, + "eval_f1_for_task500_scruples_title_generation": 21.3575, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 40.4634, + "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 39.0628, + "eval_f1_for_task602_wikitext_title_generation": 13.6254, + "eval_f1_for_task613_liar_keyword_tagging": 22.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 49.0753, + "eval_f1_for_task619_ohsumed_title_generation": 41.4911, + "eval_f1_for_task620_ohsumed_keyword_tagging": 43.6333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 95.0, + "eval_f1_for_task640_e_snli_textual_entailment": 41.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 32.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.7476, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 22.3333, + "eval_f1_for_task670_ambigqa_question_rewriting": 78.316, + "eval_f1_for_task671_ambigqa_question_rewriting": 38.055, + "eval_f1_for_task677_ollie_data_to_text": 26.3887, + "eval_f1_for_task738_perspectrum_textual_entailment": 41.0, + "eval_f1_for_task743_eurlex_title_generation": 38.8254, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.1938, + "eval_f1_for_task769_qed_title_generation": 76.3913, + "eval_f1_for_task827_copa_cause_effect_classification": 81.0, + "eval_f1_for_task828_copa_cause_effect_classification": 70.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_f1_for_task890_gwsd_textual_entailment": 36.0, + "eval_f1_for_task891_gap_coreference_resolution": 64.3, + "eval_f1_for_task892_gap_coreference_resolution": 52.0, + "eval_f1_for_task893_gap_coreference_resolution": 52.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 56.6228, + "eval_f1_for_task970_sherliic_textual_entailment": 74.0, + "eval_f1_for_textual_entailment": 36.4583, + "eval_f1_for_title_generation": 36.6375, + "eval_f1_for_word_analogy": 47.25, + "eval_gen_len": 8.5377, + "eval_global_step": 2000, + "eval_loss": 1.1825075149536133, + "eval_rouge1": 51.6979, + "eval_rouge1_for_answerability_classification": 58.4103, + "eval_rouge1_for_cause_effect_classification": 69.8435, + "eval_rouge1_for_coreference_resolution": 52.1668, + "eval_rouge1_for_data_to_text": 53.2583, + "eval_rouge1_for_dialogue_act_recognition": 55.2, + "eval_rouge1_for_grammar_error_correction": 61.6428, + "eval_rouge1_for_keyword_tagging": 69.326, + "eval_rouge1_for_overlap_extraction": 31.3019, + "eval_rouge1_for_question_rewriting": 61.9636, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 56.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 12.6084, + "eval_rouge1_for_task035_winogrande_question_rewriting": 89.6894, + "eval_rouge1_for_task036_qasc_keyword_tagging": 74.3966, + "eval_rouge1_for_task039_qasc_overlap_extraction": 36.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 73.0, + "eval_rouge1_for_task102_commongen_data_to_text": 65.805, + "eval_rouge1_for_task1152_bard_word_analogy": 35.0, + "eval_rouge1_for_task1153_bard_word_analogy": 32.0, + "eval_rouge1_for_task1154_bard_word_analogy": 28.0, + "eval_rouge1_for_task1155_bard_word_analogy": 93.0, + "eval_rouge1_for_task1156_bard_word_analogy": 39.0, + "eval_rouge1_for_task1157_bard_word_analogy": 68.0, + "eval_rouge1_for_task1158_bard_word_analogy": 51.0, + "eval_rouge1_for_task1159_bard_word_analogy": 32.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 39.3688, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 82.2858, + "eval_rouge1_for_task121_atomic_question_rewriting": 50.1911, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.3663, + "eval_rouge1_for_task1344_rte_textual_entailment": 51.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.5026, + "eval_rouge1_for_task1356_xlsum_title_generation": 25.7794, + "eval_rouge1_for_task1358_xlsum_title_generation": 39.1358, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 57.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 80.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 54.9, + "eval_rouge1_for_task1407_dart_data_to_text": 32.8903, + "eval_rouge1_for_task1409_dart_data_to_text": 50.1621, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.206, + "eval_rouge1_for_task1439_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 53.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 53.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 40.3048, + "eval_rouge1_for_task1554_scitail_textual_entailment": 55.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.0795, + "eval_rouge1_for_task1562_zest_question_rewriting": 52.9742, + "eval_rouge1_for_task1586_scifact_title_generation": 38.9634, + "eval_rouge1_for_task1598_nyc_data_to_text": 49.9892, + "eval_rouge1_for_task1612_sick_textual_entailment": 38.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 83.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 81.7977, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 89.5518, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_rouge1_for_task1659_billsum_title_generation": 36.5339, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 76.7143, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.354, + "eval_rouge1_for_task190_snli_textual_entailment": 5.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 43.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 80.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 15.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 16.0, + "eval_rouge1_for_task219_rocstories_title_generation": 23.1462, + "eval_rouge1_for_task220_rocstories_title_generation": 97.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 47.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 47.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 88.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 74.3167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 26.2705, + "eval_rouge1_for_task288_gigaword_title_generation": 31.6224, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 8.4667, + "eval_rouge1_for_task329_gap_coreference_resolution": 53.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 67.1857, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 78.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 29.9741, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 46.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 77.5341, + "eval_rouge1_for_task418_persent_title_generation": 31.1757, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.3904, + "eval_rouge1_for_task500_scruples_title_generation": 22.8908, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 41.1014, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 40.9904, + "eval_rouge1_for_task602_wikitext_title_generation": 14.4861, + "eval_rouge1_for_task613_liar_keyword_tagging": 35.4667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 55.9305, + "eval_rouge1_for_task619_ohsumed_title_generation": 44.7968, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 48.519, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 95.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 41.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 32.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.2476, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 23.1667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.7685, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 39.8578, + "eval_rouge1_for_task677_ollie_data_to_text": 29.1317, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 80.0, + "eval_rouge1_for_task743_eurlex_title_generation": 40.5519, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.4083, + "eval_rouge1_for_task769_qed_title_generation": 76.3163, + "eval_rouge1_for_task827_copa_cause_effect_classification": 81.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 70.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 36.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 64.4857, + "eval_rouge1_for_task892_gap_coreference_resolution": 52.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 52.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 58.3632, + "eval_rouge1_for_task970_sherliic_textual_entailment": 74.0, + "eval_rouge1_for_textual_entailment": 44.375, + "eval_rouge1_for_title_generation": 38.9693, + "eval_rouge1_for_word_analogy": 47.25, + "eval_rougeL": 50.2785, + "eval_rougeL_for_answerability_classification": 58.4103, + "eval_rougeL_for_cause_effect_classification": 69.0917, + "eval_rougeL_for_coreference_resolution": 52.1668, + "eval_rougeL_for_data_to_text": 45.9878, + "eval_rougeL_for_dialogue_act_recognition": 55.2, + "eval_rougeL_for_grammar_error_correction": 60.8051, + "eval_rougeL_for_keyword_tagging": 68.9248, + "eval_rougeL_for_overlap_extraction": 30.9575, + "eval_rougeL_for_question_rewriting": 58.3781, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 56.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 12.6084, + "eval_rougeL_for_task035_winogrande_question_rewriting": 89.2846, + "eval_rougeL_for_task036_qasc_keyword_tagging": 73.8906, + "eval_rougeL_for_task039_qasc_overlap_extraction": 36.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 73.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.4687, + "eval_rougeL_for_task1152_bard_word_analogy": 35.0, + "eval_rougeL_for_task1153_bard_word_analogy": 32.0, + "eval_rougeL_for_task1154_bard_word_analogy": 28.0, + "eval_rougeL_for_task1155_bard_word_analogy": 93.0, + "eval_rougeL_for_task1156_bard_word_analogy": 39.0, + "eval_rougeL_for_task1157_bard_word_analogy": 68.0, + "eval_rougeL_for_task1158_bard_word_analogy": 51.0, + "eval_rougeL_for_task1159_bard_word_analogy": 32.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 32.3287, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 81.4262, + "eval_rougeL_for_task121_atomic_question_rewriting": 45.0243, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.4048, + "eval_rougeL_for_task1344_rte_textual_entailment": 51.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.5559, + "eval_rougeL_for_task1356_xlsum_title_generation": 21.7047, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.8543, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 57.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 80.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 54.9, + "eval_rougeL_for_task1407_dart_data_to_text": 29.398, + "eval_rougeL_for_task1409_dart_data_to_text": 42.2821, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.4364, + "eval_rougeL_for_task1439_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 53.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 53.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 36.0368, + "eval_rougeL_for_task1554_scitail_textual_entailment": 55.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.1738, + "eval_rougeL_for_task1562_zest_question_rewriting": 46.9538, + "eval_rougeL_for_task1586_scifact_title_generation": 31.6646, + "eval_rougeL_for_task1598_nyc_data_to_text": 39.3689, + "eval_rougeL_for_task1612_sick_textual_entailment": 38.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 83.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 80.9553, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 88.2793, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_rougeL_for_task1659_billsum_title_generation": 31.4539, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 76.7143, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.7552, + "eval_rougeL_for_task190_snli_textual_entailment": 5.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 43.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 80.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 15.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 16.0, + "eval_rougeL_for_task219_rocstories_title_generation": 23.1462, + "eval_rougeL_for_task220_rocstories_title_generation": 97.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 47.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 47.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 88.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 74.3167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 25.5818, + "eval_rougeL_for_task288_gigaword_title_generation": 28.0445, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 8.4667, + "eval_rougeL_for_task329_gap_coreference_resolution": 53.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 67.1857, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 78.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.9003, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 46.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 64.2308, + "eval_rougeL_for_task418_persent_title_generation": 26.5542, + "eval_rougeL_for_task442_com_qa_question_rewriting": 68.1779, + "eval_rougeL_for_task500_scruples_title_generation": 21.623, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 40.8344, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 39.6379, + "eval_rougeL_for_task602_wikitext_title_generation": 14.4861, + "eval_rougeL_for_task613_liar_keyword_tagging": 35.4667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 51.7416, + "eval_rougeL_for_task619_ohsumed_title_generation": 35.3046, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 47.019, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 95.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 41.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 32.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.2476, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 23.1667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 78.5185, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 36.4235, + "eval_rougeL_for_task677_ollie_data_to_text": 24.3758, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 80.0, + "eval_rougeL_for_task743_eurlex_title_generation": 36.2116, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.6061, + "eval_rougeL_for_task769_qed_title_generation": 76.3163, + "eval_rougeL_for_task827_copa_cause_effect_classification": 81.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 70.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 36.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 64.4857, + "eval_rougeL_for_task892_gap_coreference_resolution": 52.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 52.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 44.4738, + "eval_rougeL_for_task970_sherliic_textual_entailment": 74.0, + "eval_rougeL_for_textual_entailment": 44.375, + "eval_rougeL_for_title_generation": 35.6104, + "eval_rougeL_for_word_analogy": 47.25, + "eval_runtime": 798.8387, + "eval_samples_per_second": 14.909, + "eval_steps_per_second": 0.933, + "step": 2000 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.9691, + "step": 2500 + }, + { + "epoch": 0.55, + "eval_exact_match": 34.2485, + "eval_exact_match_for_answerability_classification": 55.5385, + "eval_exact_match_for_cause_effect_classification": 45.1429, + "eval_exact_match_for_coreference_resolution": 46.2857, + "eval_exact_match_for_data_to_text": 9.5642, + "eval_exact_match_for_dialogue_act_recognition": 50.4286, + "eval_exact_match_for_grammar_error_correction": 6.5, + "eval_exact_match_for_keyword_tagging": 49.8, + "eval_exact_match_for_overlap_extraction": 12.0, + "eval_exact_match_for_question_rewriting": 3.5455, + "eval_exact_match_for_task020_mctaco_answerability_classification": 55.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 60.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 12.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 29.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 24.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 69.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 31.0, + "eval_exact_match_for_task1153_bard_word_analogy": 29.0, + "eval_exact_match_for_task1154_bard_word_analogy": 28.0, + "eval_exact_match_for_task1155_bard_word_analogy": 90.0, + "eval_exact_match_for_task1156_bard_word_analogy": 49.0, + "eval_exact_match_for_task1157_bard_word_analogy": 68.0, + "eval_exact_match_for_task1158_bard_word_analogy": 32.0, + "eval_exact_match_for_task1159_bard_word_analogy": 38.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 49.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 55.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 31.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 37.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 57.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 80.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 62.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 3.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 58.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 37.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 54.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 54.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 46.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 51.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 66.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 69.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 35.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 39.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 88.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 10.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 20.0, + "eval_exact_match_for_task219_rocstories_title_generation": 1.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 38.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 34.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 78.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 66.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 60.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 60.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 57.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 65.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 51.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 30.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 54.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 22.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 88.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 44.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 32.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 89.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 12.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 39.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 65.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 61.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 74.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 29.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 56.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 53.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 57.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 71.0, + "eval_exact_match_for_textual_entailment": 45.2083, + "eval_exact_match_for_title_generation": 10.426, + "eval_exact_match_for_word_analogy": 45.625, + "eval_f1": 51.6895, + "eval_f1_for_answerability_classification": 58.1026, + "eval_f1_for_cause_effect_classification": 65.4526, + "eval_f1_for_coreference_resolution": 53.1791, + "eval_f1_for_data_to_text": 52.6688, + "eval_f1_for_dialogue_act_recognition": 52.9286, + "eval_f1_for_grammar_error_correction": 60.1938, + "eval_f1_for_keyword_tagging": 62.4516, + "eval_f1_for_overlap_extraction": 32.997, + "eval_f1_for_question_rewriting": 68.973, + "eval_f1_for_task020_mctaco_answerability_classification": 55.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 61.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 86.7398, + "eval_f1_for_task035_winogrande_question_rewriting": 87.0801, + "eval_f1_for_task036_qasc_keyword_tagging": 65.2342, + "eval_f1_for_task039_qasc_overlap_extraction": 30.5, + "eval_f1_for_task050_multirc_answerability_classification": 69.0, + "eval_f1_for_task102_commongen_data_to_text": 54.9571, + "eval_f1_for_task1152_bard_word_analogy": 31.0, + "eval_f1_for_task1153_bard_word_analogy": 30.3333, + "eval_f1_for_task1154_bard_word_analogy": 28.0, + "eval_f1_for_task1155_bard_word_analogy": 90.0, + "eval_f1_for_task1156_bard_word_analogy": 51.6667, + "eval_f1_for_task1157_bard_word_analogy": 68.0, + "eval_f1_for_task1158_bard_word_analogy": 32.0, + "eval_f1_for_task1159_bard_word_analogy": 38.0, + "eval_f1_for_task1161_coda_19_title_generation": 38.1923, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.0997, + "eval_f1_for_task121_atomic_question_rewriting": 45.3274, + "eval_f1_for_task133_winowhy_coreference_resolution": 49.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.7497, + "eval_f1_for_task1344_rte_textual_entailment": 55.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.219, + "eval_f1_for_task1356_xlsum_title_generation": 23.3855, + "eval_f1_for_task1358_xlsum_title_generation": 35.3216, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 31.0, + "eval_f1_for_task1387_anli_textual_entailment": 37.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 57.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 80.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 62.0, + "eval_f1_for_task1407_dart_data_to_text": 32.8683, + "eval_f1_for_task1409_dart_data_to_text": 48.2031, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.9791, + "eval_f1_for_task1439_doqa_answerability_classification": 48.0, + "eval_f1_for_task1442_doqa_answerability_classification": 58.0, + "eval_f1_for_task1516_imppres_textual_entailment": 37.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 54.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 36.431, + "eval_f1_for_task1554_scitail_textual_entailment": 54.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.4084, + "eval_f1_for_task1562_zest_question_rewriting": 47.423, + "eval_f1_for_task1586_scifact_title_generation": 37.5102, + "eval_f1_for_task1598_nyc_data_to_text": 49.8296, + "eval_f1_for_task1612_sick_textual_entailment": 46.0, + "eval_f1_for_task1615_sick_textual_entailment": 51.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.0209, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 66.0, + "eval_f1_for_task1631_open_pi_data_to_text": 95.0761, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_f1_for_task1659_billsum_title_generation": 37.6864, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 71.0952, + "eval_f1_for_task1728_web_nlg_data_to_text": 64.3073, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 39.0, + "eval_f1_for_task200_multinli_textual_entailment": 88.0, + "eval_f1_for_task201_multinli_textual_entailment": 10.0, + "eval_f1_for_task202_multinli_textual_entailment": 20.0, + "eval_f1_for_task219_rocstories_title_generation": 16.6469, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_f1_for_task232_iirc_answerability_classification": 38.0, + "eval_f1_for_task233_iirc_answerability_classification": 34.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 78.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 75.5548, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 35.494, + "eval_f1_for_task288_gigaword_title_generation": 29.7273, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 25.5, + "eval_f1_for_task329_gap_coreference_resolution": 60.0, + "eval_f1_for_task330_gap_coreference_resolution": 67.7238, + "eval_f1_for_task349_squad2.0_answerability_classification": 57.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 82.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 29.7144, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 33.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 79.422, + "eval_f1_for_task418_persent_title_generation": 28.5722, + "eval_f1_for_task442_com_qa_question_rewriting": 71.4856, + "eval_f1_for_task500_scruples_title_generation": 20.4129, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.7471, + "eval_f1_for_task520_aquamuse_answerability_classification": 54.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 43.4724, + "eval_f1_for_task602_wikitext_title_generation": 14.5839, + "eval_f1_for_task613_liar_keyword_tagging": 22.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 46.4539, + "eval_f1_for_task619_ohsumed_title_generation": 43.1989, + "eval_f1_for_task620_ohsumed_keyword_tagging": 42.1333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 88.0, + "eval_f1_for_task640_e_snli_textual_entailment": 44.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 32.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.5571, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 18.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 80.1878, + "eval_f1_for_task671_ambigqa_question_rewriting": 63.6978, + "eval_f1_for_task677_ollie_data_to_text": 30.9479, + "eval_f1_for_task738_perspectrum_textual_entailment": 39.0, + "eval_f1_for_task743_eurlex_title_generation": 41.0011, + "eval_f1_for_task760_msr_sqa_data_to_text": 7.5382, + "eval_f1_for_task769_qed_title_generation": 84.0147, + "eval_f1_for_task827_copa_cause_effect_classification": 61.0, + "eval_f1_for_task828_copa_cause_effect_classification": 74.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 29.0, + "eval_f1_for_task890_gwsd_textual_entailment": 37.0, + "eval_f1_for_task891_gap_coreference_resolution": 66.1333, + "eval_f1_for_task892_gap_coreference_resolution": 53.0, + "eval_f1_for_task893_gap_coreference_resolution": 57.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 56.8947, + "eval_f1_for_task970_sherliic_textual_entailment": 71.0, + "eval_f1_for_textual_entailment": 45.2083, + "eval_f1_for_title_generation": 38.1906, + "eval_f1_for_word_analogy": 46.125, + "eval_gen_len": 8.7106, + "eval_global_step": 2500, + "eval_loss": 1.142754077911377, + "eval_rouge1": 53.7393, + "eval_rouge1_for_answerability_classification": 58.1026, + "eval_rouge1_for_cause_effect_classification": 66.4447, + "eval_rouge1_for_coreference_resolution": 53.9058, + "eval_rouge1_for_data_to_text": 55.6215, + "eval_rouge1_for_dialogue_act_recognition": 55.8302, + "eval_rouge1_for_grammar_error_correction": 65.1774, + "eval_rouge1_for_keyword_tagging": 67.7794, + "eval_rouge1_for_overlap_extraction": 34.9668, + "eval_rouge1_for_question_rewriting": 70.6251, + "eval_rouge1_for_task020_mctaco_answerability_classification": 55.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 61.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 86.7467, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.6667, + "eval_rouge1_for_task036_qasc_keyword_tagging": 71.8875, + "eval_rouge1_for_task039_qasc_overlap_extraction": 33.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 69.0, + "eval_rouge1_for_task102_commongen_data_to_text": 68.9385, + "eval_rouge1_for_task1152_bard_word_analogy": 31.0, + "eval_rouge1_for_task1153_bard_word_analogy": 32.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 28.0, + "eval_rouge1_for_task1155_bard_word_analogy": 90.0, + "eval_rouge1_for_task1156_bard_word_analogy": 51.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 68.0, + "eval_rouge1_for_task1158_bard_word_analogy": 32.0, + "eval_rouge1_for_task1159_bard_word_analogy": 38.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 42.3939, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.3379, + "eval_rouge1_for_task121_atomic_question_rewriting": 47.8858, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 49.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.5682, + "eval_rouge1_for_task1344_rte_textual_entailment": 55.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.9941, + "eval_rouge1_for_task1356_xlsum_title_generation": 27.6528, + "eval_rouge1_for_task1358_xlsum_title_generation": 39.4469, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 57.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 80.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 68.6444, + "eval_rouge1_for_task1407_dart_data_to_text": 33.7826, + "eval_rouge1_for_task1409_dart_data_to_text": 48.8506, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 42.8362, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 58.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 37.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 54.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 40.9953, + "eval_rouge1_for_task1554_scitail_textual_entailment": 54.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.5186, + "eval_rouge1_for_task1562_zest_question_rewriting": 51.0265, + "eval_rouge1_for_task1586_scifact_title_generation": 41.6241, + "eval_rouge1_for_task1598_nyc_data_to_text": 52.3021, + "eval_rouge1_for_task1612_sick_textual_entailment": 46.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 83.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.5026, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 66.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 95.1728, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_rouge1_for_task1659_billsum_title_generation": 39.8907, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 71.0952, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 65.922, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 39.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 88.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 10.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 20.0, + "eval_rouge1_for_task219_rocstories_title_generation": 21.1636, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 38.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 34.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 78.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 75.9, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 36.6003, + "eval_rouge1_for_task288_gigaword_title_generation": 32.2571, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 25.3333, + "eval_rouge1_for_task329_gap_coreference_resolution": 60.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 67.4857, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 57.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 82.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.0059, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 43.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 81.7252, + "eval_rouge1_for_task418_persent_title_generation": 32.6972, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.6994, + "eval_rouge1_for_task500_scruples_title_generation": 22.9424, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.2483, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 54.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.6358, + "eval_rouge1_for_task602_wikitext_title_generation": 15.2526, + "eval_rouge1_for_task613_liar_keyword_tagging": 36.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 53.1067, + "eval_rouge1_for_task619_ohsumed_title_generation": 45.9216, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 47.119, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 88.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 44.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 32.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.0571, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 18.0, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.4222, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.8694, + "eval_rouge1_for_task677_ollie_data_to_text": 33.5564, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 78.0, + "eval_rouge1_for_task743_eurlex_title_generation": 42.9665, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.8298, + "eval_rouge1_for_task769_qed_title_generation": 83.6953, + "eval_rouge1_for_task827_copa_cause_effect_classification": 61.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 74.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 66.3667, + "eval_rouge1_for_task892_gap_coreference_resolution": 53.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 57.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 55.3333, + "eval_rouge1_for_task957_e2e_data_to_text": 58.8729, + "eval_rouge1_for_task970_sherliic_textual_entailment": 71.0, + "eval_rouge1_for_textual_entailment": 48.4167, + "eval_rouge1_for_title_generation": 40.6901, + "eval_rouge1_for_word_analogy": 46.375, + "eval_rougeL": 52.3248, + "eval_rougeL_for_answerability_classification": 58.1026, + "eval_rougeL_for_cause_effect_classification": 65.9609, + "eval_rougeL_for_coreference_resolution": 53.9058, + "eval_rougeL_for_data_to_text": 47.6652, + "eval_rougeL_for_dialogue_act_recognition": 55.8302, + "eval_rougeL_for_grammar_error_correction": 64.239, + "eval_rougeL_for_keyword_tagging": 67.254, + "eval_rougeL_for_overlap_extraction": 34.761, + "eval_rougeL_for_question_rewriting": 67.1198, + "eval_rougeL_for_task020_mctaco_answerability_classification": 55.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 61.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 85.5371, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.3988, + "eval_rougeL_for_task036_qasc_keyword_tagging": 71.1605, + "eval_rougeL_for_task039_qasc_overlap_extraction": 33.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 69.0, + "eval_rougeL_for_task102_commongen_data_to_text": 57.699, + "eval_rougeL_for_task1152_bard_word_analogy": 31.0, + "eval_rougeL_for_task1153_bard_word_analogy": 32.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 28.0, + "eval_rougeL_for_task1155_bard_word_analogy": 90.0, + "eval_rougeL_for_task1156_bard_word_analogy": 51.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 68.0, + "eval_rougeL_for_task1158_bard_word_analogy": 32.0, + "eval_rougeL_for_task1159_bard_word_analogy": 38.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 36.191, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.4627, + "eval_rougeL_for_task121_atomic_question_rewriting": 42.349, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 49.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.8198, + "eval_rougeL_for_task1344_rte_textual_entailment": 55.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.3441, + "eval_rougeL_for_task1356_xlsum_title_generation": 22.7932, + "eval_rougeL_for_task1358_xlsum_title_generation": 34.3862, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 57.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 80.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 68.6444, + "eval_rougeL_for_task1407_dart_data_to_text": 29.3917, + "eval_rougeL_for_task1409_dart_data_to_text": 41.8091, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.8657, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 58.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 37.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 54.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 37.1265, + "eval_rougeL_for_task1554_scitail_textual_entailment": 54.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.6124, + "eval_rougeL_for_task1562_zest_question_rewriting": 45.4407, + "eval_rougeL_for_task1586_scifact_title_generation": 34.6299, + "eval_rougeL_for_task1598_nyc_data_to_text": 39.522, + "eval_rougeL_for_task1612_sick_textual_entailment": 46.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 83.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.649, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 66.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 95.0928, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_rougeL_for_task1659_billsum_title_generation": 33.5288, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 71.0952, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 56.7906, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 39.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 88.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 10.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 20.0, + "eval_rougeL_for_task219_rocstories_title_generation": 21.1636, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 38.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 34.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 78.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 75.9, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 36.1887, + "eval_rougeL_for_task288_gigaword_title_generation": 29.1865, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 25.3333, + "eval_rougeL_for_task329_gap_coreference_resolution": 60.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 67.4857, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 57.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 82.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.9383, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 43.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 68.6112, + "eval_rougeL_for_task418_persent_title_generation": 27.6132, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.2991, + "eval_rougeL_for_task500_scruples_title_generation": 21.873, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 41.8854, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 54.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 43.1317, + "eval_rougeL_for_task602_wikitext_title_generation": 15.2526, + "eval_rougeL_for_task613_liar_keyword_tagging": 36.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 50.7883, + "eval_rougeL_for_task619_ohsumed_title_generation": 38.7592, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 45.219, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 88.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 44.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 32.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.0571, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 18.0, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 80.6442, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.5814, + "eval_rougeL_for_task677_ollie_data_to_text": 27.2494, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 78.0, + "eval_rougeL_for_task743_eurlex_title_generation": 39.837, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.6408, + "eval_rougeL_for_task769_qed_title_generation": 83.2953, + "eval_rougeL_for_task827_copa_cause_effect_classification": 61.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 74.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 66.3667, + "eval_rougeL_for_task892_gap_coreference_resolution": 53.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 57.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 55.3333, + "eval_rougeL_for_task957_e2e_data_to_text": 44.4332, + "eval_rougeL_for_task970_sherliic_textual_entailment": 71.0, + "eval_rougeL_for_textual_entailment": 48.4167, + "eval_rougeL_for_title_generation": 37.5579, + "eval_rougeL_for_word_analogy": 46.375, + "eval_runtime": 756.5153, + "eval_samples_per_second": 15.743, + "eval_steps_per_second": 0.985, + "step": 2500 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.9425, + "step": 3000 + }, + { + "epoch": 0.66, + "eval_exact_match": 33.4593, + "eval_exact_match_for_answerability_classification": 58.7692, + "eval_exact_match_for_cause_effect_classification": 46.8571, + "eval_exact_match_for_coreference_resolution": 44.8571, + "eval_exact_match_for_data_to_text": 5.8111, + "eval_exact_match_for_dialogue_act_recognition": 49.0, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 44.0, + "eval_exact_match_for_overlap_extraction": 14.0, + "eval_exact_match_for_question_rewriting": 4.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 56.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 52.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 10.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 34.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 28.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 78.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 33.0, + "eval_exact_match_for_task1153_bard_word_analogy": 28.0, + "eval_exact_match_for_task1154_bard_word_analogy": 23.0, + "eval_exact_match_for_task1155_bard_word_analogy": 86.0, + "eval_exact_match_for_task1156_bard_word_analogy": 51.0, + "eval_exact_match_for_task1157_bard_word_analogy": 68.0, + "eval_exact_match_for_task1158_bard_word_analogy": 41.0, + "eval_exact_match_for_task1159_bard_word_analogy": 31.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 12.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 44.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 56.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 12.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 12.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 2.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 56.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 86.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 58.0, + "eval_exact_match_for_task1407_dart_data_to_text": 1.0, + "eval_exact_match_for_task1409_dart_data_to_text": 2.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 58.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 48.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 59.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 73.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 42.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 52.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 9.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 38.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 36.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 16.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 47.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 92.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 9.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 68.0, + "eval_exact_match_for_task219_rocstories_title_generation": 1.0, + "eval_exact_match_for_task220_rocstories_title_generation": 99.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 52.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 83.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 58.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 60.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 49.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 38.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 2.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 66.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 3.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 22.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 65.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 27.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 38.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 43.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 83.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 20.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 36.0, + "eval_exact_match_for_task743_eurlex_title_generation": 3.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 62.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 75.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 68.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 35.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 57.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 70.0, + "eval_exact_match_for_textual_entailment": 42.5417, + "eval_exact_match_for_title_generation": 10.3139, + "eval_exact_match_for_word_analogy": 45.125, + "eval_f1": 50.8785, + "eval_f1_for_answerability_classification": 61.3333, + "eval_f1_for_cause_effect_classification": 67.6117, + "eval_f1_for_coreference_resolution": 50.4406, + "eval_f1_for_data_to_text": 50.216, + "eval_f1_for_dialogue_act_recognition": 52.2143, + "eval_f1_for_grammar_error_correction": 56.2215, + "eval_f1_for_keyword_tagging": 58.742, + "eval_f1_for_overlap_extraction": 28.6979, + "eval_f1_for_question_rewriting": 70.2636, + "eval_f1_for_task020_mctaco_answerability_classification": 56.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 55.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 90.0753, + "eval_f1_for_task035_winogrande_question_rewriting": 90.5739, + "eval_f1_for_task036_qasc_keyword_tagging": 68.8957, + "eval_f1_for_task039_qasc_overlap_extraction": 35.8333, + "eval_f1_for_task050_multirc_answerability_classification": 78.0, + "eval_f1_for_task102_commongen_data_to_text": 51.407, + "eval_f1_for_task1152_bard_word_analogy": 33.0, + "eval_f1_for_task1153_bard_word_analogy": 30.0, + "eval_f1_for_task1154_bard_word_analogy": 23.0, + "eval_f1_for_task1155_bard_word_analogy": 86.0, + "eval_f1_for_task1156_bard_word_analogy": 53.0, + "eval_f1_for_task1157_bard_word_analogy": 68.0, + "eval_f1_for_task1158_bard_word_analogy": 41.0, + "eval_f1_for_task1159_bard_word_analogy": 32.3333, + "eval_f1_for_task1161_coda_19_title_generation": 37.3732, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.7671, + "eval_f1_for_task121_atomic_question_rewriting": 49.6023, + "eval_f1_for_task133_winowhy_coreference_resolution": 44.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.0269, + "eval_f1_for_task1344_rte_textual_entailment": 56.0, + "eval_f1_for_task1345_qqp_question_rewriting": 37.8882, + "eval_f1_for_task1356_xlsum_title_generation": 24.0064, + "eval_f1_for_task1358_xlsum_title_generation": 37.5489, + "eval_f1_for_task1385_anli_textual_entailment": 0.0, + "eval_f1_for_task1386_anli_textual_entailment": 12.0, + "eval_f1_for_task1387_anli_textual_entailment": 12.0, + "eval_f1_for_task1388_cb_textual_entailment": 2.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 56.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 86.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 58.0, + "eval_f1_for_task1407_dart_data_to_text": 29.2217, + "eval_f1_for_task1409_dart_data_to_text": 46.9676, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 28.9268, + "eval_f1_for_task1439_doqa_answerability_classification": 48.0, + "eval_f1_for_task1442_doqa_answerability_classification": 58.0, + "eval_f1_for_task1516_imppres_textual_entailment": 48.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 59.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 38.7526, + "eval_f1_for_task1554_scitail_textual_entailment": 73.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.5161, + "eval_f1_for_task1562_zest_question_rewriting": 52.7322, + "eval_f1_for_task1586_scifact_title_generation": 37.5251, + "eval_f1_for_task1598_nyc_data_to_text": 49.233, + "eval_f1_for_task1612_sick_textual_entailment": 42.0, + "eval_f1_for_task1615_sick_textual_entailment": 52.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 81.5989, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_f1_for_task1631_open_pi_data_to_text": 86.612, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_f1_for_task1659_billsum_title_generation": 36.7646, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 73.8952, + "eval_f1_for_task1728_web_nlg_data_to_text": 61.0526, + "eval_f1_for_task190_snli_textual_entailment": 16.0, + "eval_f1_for_task199_multinli_textual_entailment": 47.0, + "eval_f1_for_task200_multinli_textual_entailment": 92.0, + "eval_f1_for_task201_multinli_textual_entailment": 9.0, + "eval_f1_for_task202_multinli_textual_entailment": 68.0, + "eval_f1_for_task219_rocstories_title_generation": 15.9651, + "eval_f1_for_task220_rocstories_title_generation": 99.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_f1_for_task232_iirc_answerability_classification": 52.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 83.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 67.8833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 21.5626, + "eval_f1_for_task288_gigaword_title_generation": 30.1653, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 5.6667, + "eval_f1_for_task329_gap_coreference_resolution": 51.0, + "eval_f1_for_task330_gap_coreference_resolution": 67.8571, + "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 29.0488, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 40.5, + "eval_f1_for_task402_grailqa_question_rewriting": 79.4593, + "eval_f1_for_task418_persent_title_generation": 28.9443, + "eval_f1_for_task442_com_qa_question_rewriting": 71.0544, + "eval_f1_for_task500_scruples_title_generation": 23.2158, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.6912, + "eval_f1_for_task520_aquamuse_answerability_classification": 66.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 38.2997, + "eval_f1_for_task602_wikitext_title_generation": 14.5955, + "eval_f1_for_task613_liar_keyword_tagging": 24.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 48.8997, + "eval_f1_for_task619_ohsumed_title_generation": 44.6661, + "eval_f1_for_task620_ohsumed_keyword_tagging": 40.3714, + "eval_f1_for_task623_ohsumed_keyword_tagging": 65.0, + "eval_f1_for_task640_e_snli_textual_entailment": 27.0, + "eval_f1_for_task641_e_snli_textual_entailment": 38.0, + "eval_f1_for_task642_e_snli_textual_entailment": 43.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.7762, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 24.5, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.4057, + "eval_f1_for_task671_ambigqa_question_rewriting": 59.7423, + "eval_f1_for_task677_ollie_data_to_text": 29.4465, + "eval_f1_for_task738_perspectrum_textual_entailment": 36.0, + "eval_f1_for_task743_eurlex_title_generation": 38.7688, + "eval_f1_for_task760_msr_sqa_data_to_text": 7.4603, + "eval_f1_for_task769_qed_title_generation": 84.8026, + "eval_f1_for_task827_copa_cause_effect_classification": 75.0, + "eval_f1_for_task828_copa_cause_effect_classification": 68.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_f1_for_task890_gwsd_textual_entailment": 35.0, + "eval_f1_for_task891_gap_coreference_resolution": 61.5333, + "eval_f1_for_task892_gap_coreference_resolution": 51.0, + "eval_f1_for_task893_gap_coreference_resolution": 57.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_f1_for_task957_e2e_data_to_text": 58.9039, + "eval_f1_for_task970_sherliic_textual_entailment": 70.0, + "eval_f1_for_textual_entailment": 42.5417, + "eval_f1_for_title_generation": 38.048, + "eval_f1_for_word_analogy": 45.7917, + "eval_gen_len": 8.9207, + "eval_global_step": 3000, + "eval_loss": 1.1522873640060425, + "eval_rouge1": 53.9387, + "eval_rouge1_for_answerability_classification": 61.3333, + "eval_rouge1_for_cause_effect_classification": 68.4727, + "eval_rouge1_for_coreference_resolution": 50.9284, + "eval_rouge1_for_data_to_text": 53.3645, + "eval_rouge1_for_dialogue_act_recognition": 55.8531, + "eval_rouge1_for_grammar_error_correction": 61.428, + "eval_rouge1_for_keyword_tagging": 63.5254, + "eval_rouge1_for_overlap_extraction": 32.2686, + "eval_rouge1_for_question_rewriting": 71.9542, + "eval_rouge1_for_task020_mctaco_answerability_classification": 56.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 55.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 90.0859, + "eval_rouge1_for_task035_winogrande_question_rewriting": 91.0772, + "eval_rouge1_for_task036_qasc_keyword_tagging": 73.5959, + "eval_rouge1_for_task039_qasc_overlap_extraction": 41.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 78.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.0767, + "eval_rouge1_for_task1152_bard_word_analogy": 33.0, + "eval_rouge1_for_task1153_bard_word_analogy": 32.0, + "eval_rouge1_for_task1154_bard_word_analogy": 23.0, + "eval_rouge1_for_task1155_bard_word_analogy": 86.0, + "eval_rouge1_for_task1156_bard_word_analogy": 53.0, + "eval_rouge1_for_task1157_bard_word_analogy": 68.0, + "eval_rouge1_for_task1158_bard_word_analogy": 41.0, + "eval_rouge1_for_task1159_bard_word_analogy": 32.3333, + "eval_rouge1_for_task1161_coda_19_title_generation": 41.5097, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.9802, + "eval_rouge1_for_task121_atomic_question_rewriting": 52.4977, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 44.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.9835, + "eval_rouge1_for_task1344_rte_textual_entailment": 56.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.0003, + "eval_rouge1_for_task1356_xlsum_title_generation": 28.069, + "eval_rouge1_for_task1358_xlsum_title_generation": 41.7859, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 41.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 39.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 56.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 86.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 62.4714, + "eval_rouge1_for_task1407_dart_data_to_text": 30.4581, + "eval_rouge1_for_task1409_dart_data_to_text": 47.7981, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.23, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 58.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 48.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 59.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 41.6947, + "eval_rouge1_for_task1554_scitail_textual_entailment": 73.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.626, + "eval_rouge1_for_task1562_zest_question_rewriting": 55.9863, + "eval_rouge1_for_task1586_scifact_title_generation": 41.2764, + "eval_rouge1_for_task1598_nyc_data_to_text": 51.2139, + "eval_rouge1_for_task1612_sick_textual_entailment": 42.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 84.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 81.8626, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 86.8368, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_rouge1_for_task1659_billsum_title_generation": 38.3236, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 73.8952, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.3975, + "eval_rouge1_for_task190_snli_textual_entailment": 16.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 47.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 92.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 9.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 68.0, + "eval_rouge1_for_task219_rocstories_title_generation": 20.4033, + "eval_rouge1_for_task220_rocstories_title_generation": 99.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 52.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 83.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 68.55, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 22.8705, + "eval_rouge1_for_task288_gigaword_title_generation": 32.931, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 5.6667, + "eval_rouge1_for_task329_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 67.619, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 29.2417, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 46.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 82.1525, + "eval_rouge1_for_task418_persent_title_generation": 31.9181, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.2205, + "eval_rouge1_for_task500_scruples_title_generation": 24.675, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.0804, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 66.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 39.9996, + "eval_rouge1_for_task602_wikitext_title_generation": 15.1281, + "eval_rouge1_for_task613_liar_keyword_tagging": 36.5, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 54.7341, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.4103, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 47.2548, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 65.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 27.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 38.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 43.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.2762, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 24.9, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.5574, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 61.0752, + "eval_rouge1_for_task677_ollie_data_to_text": 32.2841, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 79.0, + "eval_rouge1_for_task743_eurlex_title_generation": 40.7058, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.6136, + "eval_rouge1_for_task769_qed_title_generation": 84.3701, + "eval_rouge1_for_task827_copa_cause_effect_classification": 75.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 68.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 35.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 61.8667, + "eval_rouge1_for_task892_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 57.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_rouge1_for_task957_e2e_data_to_text": 60.7457, + "eval_rouge1_for_task970_sherliic_textual_entailment": 70.0, + "eval_rouge1_for_textual_entailment": 50.75, + "eval_rouge1_for_title_generation": 40.35, + "eval_rouge1_for_word_analogy": 46.0417, + "eval_rougeL": 52.4679, + "eval_rougeL_for_answerability_classification": 61.3333, + "eval_rougeL_for_cause_effect_classification": 67.6453, + "eval_rougeL_for_coreference_resolution": 50.9284, + "eval_rougeL_for_data_to_text": 45.6479, + "eval_rougeL_for_dialogue_act_recognition": 55.8531, + "eval_rougeL_for_grammar_error_correction": 60.2718, + "eval_rougeL_for_keyword_tagging": 62.9174, + "eval_rougeL_for_overlap_extraction": 32.0406, + "eval_rougeL_for_question_rewriting": 68.1186, + "eval_rougeL_for_task020_mctaco_answerability_classification": 56.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 55.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 89.3778, + "eval_rougeL_for_task035_winogrande_question_rewriting": 91.0147, + "eval_rougeL_for_task036_qasc_keyword_tagging": 71.7229, + "eval_rougeL_for_task039_qasc_overlap_extraction": 41.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 78.0, + "eval_rougeL_for_task102_commongen_data_to_text": 55.7992, + "eval_rougeL_for_task1152_bard_word_analogy": 33.0, + "eval_rougeL_for_task1153_bard_word_analogy": 32.0, + "eval_rougeL_for_task1154_bard_word_analogy": 23.0, + "eval_rougeL_for_task1155_bard_word_analogy": 86.0, + "eval_rougeL_for_task1156_bard_word_analogy": 53.0, + "eval_rougeL_for_task1157_bard_word_analogy": 68.0, + "eval_rougeL_for_task1158_bard_word_analogy": 41.0, + "eval_rougeL_for_task1159_bard_word_analogy": 32.3333, + "eval_rougeL_for_task1161_coda_19_title_generation": 34.962, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 80.5772, + "eval_rougeL_for_task121_atomic_question_rewriting": 46.9486, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 44.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.7075, + "eval_rougeL_for_task1344_rte_textual_entailment": 56.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.4161, + "eval_rougeL_for_task1356_xlsum_title_generation": 24.0208, + "eval_rougeL_for_task1358_xlsum_title_generation": 35.9685, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 41.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 39.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 56.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 86.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 62.4714, + "eval_rougeL_for_task1407_dart_data_to_text": 26.5895, + "eval_rougeL_for_task1409_dart_data_to_text": 40.5012, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.7825, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 58.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 48.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 59.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 37.8732, + "eval_rougeL_for_task1554_scitail_textual_entailment": 73.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7611, + "eval_rougeL_for_task1562_zest_question_rewriting": 49.2234, + "eval_rougeL_for_task1586_scifact_title_generation": 35.7865, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.237, + "eval_rougeL_for_task1612_sick_textual_entailment": 42.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 84.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 80.857, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 83.5432, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_rougeL_for_task1659_billsum_title_generation": 32.8447, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 73.8952, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 54.9073, + "eval_rougeL_for_task190_snli_textual_entailment": 16.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 47.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 92.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 9.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 68.0, + "eval_rougeL_for_task219_rocstories_title_generation": 20.4033, + "eval_rougeL_for_task220_rocstories_title_generation": 99.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 52.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 83.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 68.55, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 22.4146, + "eval_rougeL_for_task288_gigaword_title_generation": 28.5342, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 5.6667, + "eval_rougeL_for_task329_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 67.619, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.4301, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 46.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 66.5917, + "eval_rougeL_for_task418_persent_title_generation": 27.3998, + "eval_rougeL_for_task442_com_qa_question_rewriting": 69.0225, + "eval_rougeL_for_task500_scruples_title_generation": 23.5748, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.6257, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 66.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 38.3337, + "eval_rougeL_for_task602_wikitext_title_generation": 15.1281, + "eval_rougeL_for_task613_liar_keyword_tagging": 36.5, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 49.7536, + "eval_rougeL_for_task619_ohsumed_title_generation": 40.3073, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 46.0881, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 65.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 27.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 38.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 43.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.2762, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 24.9, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 78.7673, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 58.5078, + "eval_rougeL_for_task677_ollie_data_to_text": 26.2226, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 79.0, + "eval_rougeL_for_task743_eurlex_title_generation": 35.7905, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.6347, + "eval_rougeL_for_task769_qed_title_generation": 84.3701, + "eval_rougeL_for_task827_copa_cause_effect_classification": 75.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 68.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 35.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 61.8667, + "eval_rougeL_for_task892_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 57.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_rougeL_for_task957_e2e_data_to_text": 46.5268, + "eval_rougeL_for_task970_sherliic_textual_entailment": 70.0, + "eval_rougeL_for_textual_entailment": 50.75, + "eval_rougeL_for_title_generation": 37.1194, + "eval_rougeL_for_word_analogy": 46.0417, + "eval_runtime": 817.9683, + "eval_samples_per_second": 14.56, + "eval_steps_per_second": 0.911, + "step": 3000 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.9165, + "step": 3500 + }, + { + "epoch": 0.76, + "eval_exact_match": 33.3417, + "eval_exact_match_for_answerability_classification": 53.6923, + "eval_exact_match_for_cause_effect_classification": 50.8571, + "eval_exact_match_for_coreference_resolution": 43.0714, + "eval_exact_match_for_data_to_text": 6.1743, + "eval_exact_match_for_dialogue_act_recognition": 56.2857, + "eval_exact_match_for_grammar_error_correction": 6.0, + "eval_exact_match_for_keyword_tagging": 44.2, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 5.3636, + "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 55.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 12.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 7.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 22.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 74.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 35.0, + "eval_exact_match_for_task1153_bard_word_analogy": 38.0, + "eval_exact_match_for_task1154_bard_word_analogy": 18.0, + "eval_exact_match_for_task1155_bard_word_analogy": 70.0, + "eval_exact_match_for_task1156_bard_word_analogy": 44.0, + "eval_exact_match_for_task1157_bard_word_analogy": 64.0, + "eval_exact_match_for_task1158_bard_word_analogy": 41.0, + "eval_exact_match_for_task1159_bard_word_analogy": 35.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 2.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 15.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 36.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 11.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 20.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 8.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 23.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 61.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 83.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 68.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 3.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 35.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 54.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 53.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 12.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 47.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 51.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 14.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 41.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 67.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 34.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 38.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 88.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 12.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 53.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 99.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 16.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 9.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 90.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 52.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 6.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 66.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 55.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 72.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 62.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 69.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 32.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 78.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 17.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 81.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 57.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 45.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 3.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 37.0, + "eval_exact_match_for_task743_eurlex_title_generation": 3.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 66.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 84.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 58.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 58.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 51.0, + "eval_exact_match_for_textual_entailment": 42.25, + "eval_exact_match_for_title_generation": 10.9305, + "eval_exact_match_for_word_analogy": 43.125, + "eval_f1": 50.2713, + "eval_f1_for_answerability_classification": 56.2564, + "eval_f1_for_cause_effect_classification": 66.6183, + "eval_f1_for_coreference_resolution": 48.7451, + "eval_f1_for_data_to_text": 49.7935, + "eval_f1_for_dialogue_act_recognition": 58.2857, + "eval_f1_for_grammar_error_correction": 58.8077, + "eval_f1_for_keyword_tagging": 59.5765, + "eval_f1_for_overlap_extraction": 24.9249, + "eval_f1_for_question_rewriting": 70.8573, + "eval_f1_for_task020_mctaco_answerability_classification": 52.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 59.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 92.2461, + "eval_f1_for_task035_winogrande_question_rewriting": 90.2607, + "eval_f1_for_task036_qasc_keyword_tagging": 63.5394, + "eval_f1_for_task039_qasc_overlap_extraction": 33.1333, + "eval_f1_for_task050_multirc_answerability_classification": 74.0, + "eval_f1_for_task102_commongen_data_to_text": 54.1071, + "eval_f1_for_task1152_bard_word_analogy": 35.0, + "eval_f1_for_task1153_bard_word_analogy": 38.0, + "eval_f1_for_task1154_bard_word_analogy": 18.0, + "eval_f1_for_task1155_bard_word_analogy": 70.0, + "eval_f1_for_task1156_bard_word_analogy": 46.0, + "eval_f1_for_task1157_bard_word_analogy": 64.0, + "eval_f1_for_task1158_bard_word_analogy": 41.0, + "eval_f1_for_task1159_bard_word_analogy": 35.6667, + "eval_f1_for_task1161_coda_19_title_generation": 38.0801, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 82.9294, + "eval_f1_for_task121_atomic_question_rewriting": 48.2806, + "eval_f1_for_task133_winowhy_coreference_resolution": 36.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.7652, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.6614, + "eval_f1_for_task1356_xlsum_title_generation": 21.4939, + "eval_f1_for_task1358_xlsum_title_generation": 34.4378, + "eval_f1_for_task1385_anli_textual_entailment": 11.0, + "eval_f1_for_task1386_anli_textual_entailment": 20.0, + "eval_f1_for_task1387_anli_textual_entailment": 8.0, + "eval_f1_for_task1388_cb_textual_entailment": 23.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 61.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 83.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 68.0, + "eval_f1_for_task1407_dart_data_to_text": 25.236, + "eval_f1_for_task1409_dart_data_to_text": 50.1476, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.0208, + "eval_f1_for_task1439_doqa_answerability_classification": 48.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 35.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 54.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1540_peer_read_title_generation": 38.6955, + "eval_f1_for_task1554_scitail_textual_entailment": 53.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.5945, + "eval_f1_for_task1562_zest_question_rewriting": 48.2683, + "eval_f1_for_task1586_scifact_title_generation": 37.2157, + "eval_f1_for_task1598_nyc_data_to_text": 52.44, + "eval_f1_for_task1612_sick_textual_entailment": 47.0, + "eval_f1_for_task1615_sick_textual_entailment": 51.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 82.023, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_f1_for_task1631_open_pi_data_to_text": 86.2193, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 67.0, + "eval_f1_for_task1659_billsum_title_generation": 36.9436, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 70.3524, + "eval_f1_for_task1728_web_nlg_data_to_text": 58.9515, + "eval_f1_for_task190_snli_textual_entailment": 38.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 88.0, + "eval_f1_for_task201_multinli_textual_entailment": 12.0, + "eval_f1_for_task202_multinli_textual_entailment": 53.0, + "eval_f1_for_task219_rocstories_title_generation": 19.7143, + "eval_f1_for_task220_rocstories_title_generation": 99.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 16.0, + "eval_f1_for_task233_iirc_answerability_classification": 9.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 90.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 64.2881, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 16.7164, + "eval_f1_for_task288_gigaword_title_generation": 30.6161, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 11.3333, + "eval_f1_for_task329_gap_coreference_resolution": 55.0, + "eval_f1_for_task330_gap_coreference_resolution": 72.2571, + "eval_f1_for_task349_squad2.0_answerability_classification": 55.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 86.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 89.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 29.8622, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 36.5, + "eval_f1_for_task402_grailqa_question_rewriting": 77.798, + "eval_f1_for_task418_persent_title_generation": 29.8855, + "eval_f1_for_task442_com_qa_question_rewriting": 71.5636, + "eval_f1_for_task500_scruples_title_generation": 19.1438, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.1779, + "eval_f1_for_task520_aquamuse_answerability_classification": 78.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 40.1691, + "eval_f1_for_task602_wikitext_title_generation": 13.5455, + "eval_f1_for_task613_liar_keyword_tagging": 20.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 34.4659, + "eval_f1_for_task619_ohsumed_title_generation": 44.8529, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.6667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 81.0, + "eval_f1_for_task640_e_snli_textual_entailment": 34.0, + "eval_f1_for_task641_e_snli_textual_entailment": 57.0, + "eval_f1_for_task642_e_snli_textual_entailment": 45.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 95.0095, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 5.6667, + "eval_f1_for_task670_ambigqa_question_rewriting": 78.1891, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.2099, + "eval_f1_for_task677_ollie_data_to_text": 24.4884, + "eval_f1_for_task738_perspectrum_textual_entailment": 37.0, + "eval_f1_for_task743_eurlex_title_generation": 39.4145, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.7518, + "eval_f1_for_task769_qed_title_generation": 84.496, + "eval_f1_for_task827_copa_cause_effect_classification": 84.0, + "eval_f1_for_task828_copa_cause_effect_classification": 58.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 67.3667, + "eval_f1_for_task892_gap_coreference_resolution": 49.0, + "eval_f1_for_task893_gap_coreference_resolution": 44.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task957_e2e_data_to_text": 57.9487, + "eval_f1_for_task970_sherliic_textual_entailment": 51.0, + "eval_f1_for_textual_entailment": 42.25, + "eval_f1_for_title_generation": 38.1996, + "eval_f1_for_word_analogy": 43.4583, + "eval_gen_len": 8.5626, + "eval_global_step": 3500, + "eval_loss": 1.1878433227539062, + "eval_rouge1": 52.9417, + "eval_rouge1_for_answerability_classification": 56.2564, + "eval_rouge1_for_cause_effect_classification": 67.1225, + "eval_rouge1_for_coreference_resolution": 49.2337, + "eval_rouge1_for_data_to_text": 52.8806, + "eval_rouge1_for_dialogue_act_recognition": 61.9952, + "eval_rouge1_for_grammar_error_correction": 63.518, + "eval_rouge1_for_keyword_tagging": 64.3309, + "eval_rouge1_for_overlap_extraction": 27.5853, + "eval_rouge1_for_question_rewriting": 72.6076, + "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 59.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.2944, + "eval_rouge1_for_task035_winogrande_question_rewriting": 90.8616, + "eval_rouge1_for_task036_qasc_keyword_tagging": 66.759, + "eval_rouge1_for_task039_qasc_overlap_extraction": 37.9667, + "eval_rouge1_for_task050_multirc_answerability_classification": 74.0, + "eval_rouge1_for_task102_commongen_data_to_text": 68.5781, + "eval_rouge1_for_task1152_bard_word_analogy": 35.0, + "eval_rouge1_for_task1153_bard_word_analogy": 39.0, + "eval_rouge1_for_task1154_bard_word_analogy": 18.0, + "eval_rouge1_for_task1155_bard_word_analogy": 70.0, + "eval_rouge1_for_task1156_bard_word_analogy": 46.0, + "eval_rouge1_for_task1157_bard_word_analogy": 64.0, + "eval_rouge1_for_task1158_bard_word_analogy": 41.0, + "eval_rouge1_for_task1159_bard_word_analogy": 35.6667, + "eval_rouge1_for_task1161_coda_19_title_generation": 41.7523, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 83.0648, + "eval_rouge1_for_task121_atomic_question_rewriting": 51.2163, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 36.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.6427, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.9037, + "eval_rouge1_for_task1356_xlsum_title_generation": 25.6949, + "eval_rouge1_for_task1358_xlsum_title_generation": 39.019, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 61.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 83.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 71.3, + "eval_rouge1_for_task1407_dart_data_to_text": 27.296, + "eval_rouge1_for_task1409_dart_data_to_text": 50.6694, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.4076, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 35.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 54.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 41.7929, + "eval_rouge1_for_task1554_scitail_textual_entailment": 53.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.6285, + "eval_rouge1_for_task1562_zest_question_rewriting": 52.3911, + "eval_rouge1_for_task1586_scifact_title_generation": 40.8638, + "eval_rouge1_for_task1598_nyc_data_to_text": 55.0112, + "eval_rouge1_for_task1612_sick_textual_entailment": 47.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 83.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 82.3027, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 86.3224, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 67.0, + "eval_rouge1_for_task1659_billsum_title_generation": 38.5605, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 70.3524, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 60.3824, + "eval_rouge1_for_task190_snli_textual_entailment": 38.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 88.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 12.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 53.0, + "eval_rouge1_for_task219_rocstories_title_generation": 22.5025, + "eval_rouge1_for_task220_rocstories_title_generation": 99.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 16.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 9.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 90.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 65.1333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 17.2039, + "eval_rouge1_for_task288_gigaword_title_generation": 33.9259, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 11.3333, + "eval_rouge1_for_task329_gap_coreference_resolution": 55.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 72.019, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 55.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 86.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 89.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 29.879, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 42.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 80.0825, + "eval_rouge1_for_task418_persent_title_generation": 33.0012, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.6555, + "eval_rouge1_for_task500_scruples_title_generation": 20.3932, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 41.9846, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 78.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 41.9743, + "eval_rouge1_for_task602_wikitext_title_generation": 14.3787, + "eval_rouge1_for_task613_liar_keyword_tagging": 35.6333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 37.9788, + "eval_rouge1_for_task619_ohsumed_title_generation": 49.0823, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 42.7524, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 81.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 57.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 45.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.5095, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 5.6667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.5662, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.3452, + "eval_rouge1_for_task677_ollie_data_to_text": 26.7644, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 80.0, + "eval_rouge1_for_task743_eurlex_title_generation": 40.9408, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.7006, + "eval_rouge1_for_task769_qed_title_generation": 84.0433, + "eval_rouge1_for_task827_copa_cause_effect_classification": 84.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 58.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 67.6, + "eval_rouge1_for_task892_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 44.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rouge1_for_task957_e2e_data_to_text": 60.0273, + "eval_rouge1_for_task970_sherliic_textual_entailment": 51.0, + "eval_rouge1_for_textual_entailment": 48.7778, + "eval_rouge1_for_title_generation": 40.485, + "eval_rouge1_for_word_analogy": 43.5833, + "eval_rougeL": 51.4795, + "eval_rougeL_for_answerability_classification": 56.2564, + "eval_rougeL_for_cause_effect_classification": 66.6269, + "eval_rougeL_for_coreference_resolution": 49.2337, + "eval_rougeL_for_data_to_text": 44.7792, + "eval_rougeL_for_dialogue_act_recognition": 61.9952, + "eval_rougeL_for_grammar_error_correction": 62.8819, + "eval_rougeL_for_keyword_tagging": 63.7003, + "eval_rougeL_for_overlap_extraction": 27.4863, + "eval_rougeL_for_question_rewriting": 68.7929, + "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 59.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 90.0831, + "eval_rougeL_for_task035_winogrande_question_rewriting": 90.2973, + "eval_rougeL_for_task036_qasc_keyword_tagging": 64.9397, + "eval_rougeL_for_task039_qasc_overlap_extraction": 37.9667, + "eval_rougeL_for_task050_multirc_answerability_classification": 74.0, + "eval_rougeL_for_task102_commongen_data_to_text": 57.7558, + "eval_rougeL_for_task1152_bard_word_analogy": 35.0, + "eval_rougeL_for_task1153_bard_word_analogy": 39.0, + "eval_rougeL_for_task1154_bard_word_analogy": 18.0, + "eval_rougeL_for_task1155_bard_word_analogy": 70.0, + "eval_rougeL_for_task1156_bard_word_analogy": 46.0, + "eval_rougeL_for_task1157_bard_word_analogy": 64.0, + "eval_rougeL_for_task1158_bard_word_analogy": 41.0, + "eval_rougeL_for_task1159_bard_word_analogy": 35.6667, + "eval_rougeL_for_task1161_coda_19_title_generation": 35.4842, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 82.6618, + "eval_rougeL_for_task121_atomic_question_rewriting": 46.0751, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 36.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.5736, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.5908, + "eval_rougeL_for_task1356_xlsum_title_generation": 21.8324, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.8299, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 61.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 83.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 71.3, + "eval_rougeL_for_task1407_dart_data_to_text": 21.0998, + "eval_rougeL_for_task1409_dart_data_to_text": 42.8631, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.0711, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 35.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 54.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 38.0709, + "eval_rougeL_for_task1554_scitail_textual_entailment": 53.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.6928, + "eval_rougeL_for_task1562_zest_question_rewriting": 45.891, + "eval_rougeL_for_task1586_scifact_title_generation": 34.61, + "eval_rougeL_for_task1598_nyc_data_to_text": 42.8825, + "eval_rougeL_for_task1612_sick_textual_entailment": 47.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 83.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 81.0317, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 82.7388, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 67.0, + "eval_rougeL_for_task1659_billsum_title_generation": 32.7661, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 70.3524, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.8762, + "eval_rougeL_for_task190_snli_textual_entailment": 38.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 88.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 12.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 53.0, + "eval_rougeL_for_task219_rocstories_title_generation": 22.1692, + "eval_rougeL_for_task220_rocstories_title_generation": 99.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 16.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 9.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 90.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 65.1333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 17.006, + "eval_rougeL_for_task288_gigaword_title_generation": 29.5043, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 11.3333, + "eval_rougeL_for_task329_gap_coreference_resolution": 55.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 72.019, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 55.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 86.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 89.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.7922, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 42.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 64.5184, + "eval_rougeL_for_task418_persent_title_generation": 28.5932, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.8621, + "eval_rougeL_for_task500_scruples_title_generation": 19.4019, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 41.4126, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 78.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.1513, + "eval_rougeL_for_task602_wikitext_title_generation": 14.3787, + "eval_rougeL_for_task613_liar_keyword_tagging": 35.6333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 35.5961, + "eval_rougeL_for_task619_ohsumed_title_generation": 42.047, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 41.419, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 81.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 57.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 45.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.5095, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 5.6667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 77.9802, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 67.7305, + "eval_rougeL_for_task677_ollie_data_to_text": 22.336, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 80.0, + "eval_rougeL_for_task743_eurlex_title_generation": 36.5079, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.0478, + "eval_rougeL_for_task769_qed_title_generation": 84.0433, + "eval_rougeL_for_task827_copa_cause_effect_classification": 84.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 58.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 67.6, + "eval_rougeL_for_task892_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 44.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rougeL_for_task957_e2e_data_to_text": 45.7519, + "eval_rougeL_for_task970_sherliic_textual_entailment": 51.0, + "eval_rougeL_for_textual_entailment": 48.7778, + "eval_rougeL_for_title_generation": 37.28, + "eval_rougeL_for_word_analogy": 43.5833, + "eval_runtime": 779.0369, + "eval_samples_per_second": 15.288, + "eval_steps_per_second": 0.956, + "step": 3500 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.8813, + "step": 4000 + }, + { + "epoch": 0.87, + "eval_exact_match": 34.8279, + "eval_exact_match_for_answerability_classification": 59.4615, + "eval_exact_match_for_cause_effect_classification": 49.0, + "eval_exact_match_for_coreference_resolution": 40.2143, + "eval_exact_match_for_data_to_text": 6.7797, + "eval_exact_match_for_dialogue_act_recognition": 52.7143, + "eval_exact_match_for_grammar_error_correction": 5.5, + "eval_exact_match_for_keyword_tagging": 48.2, + "eval_exact_match_for_overlap_extraction": 9.0, + "eval_exact_match_for_question_rewriting": 5.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 57.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 57.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 13.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 10.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 36.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 18.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 77.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 33.0, + "eval_exact_match_for_task1153_bard_word_analogy": 35.0, + "eval_exact_match_for_task1154_bard_word_analogy": 22.0, + "eval_exact_match_for_task1155_bard_word_analogy": 70.0, + "eval_exact_match_for_task1156_bard_word_analogy": 50.0, + "eval_exact_match_for_task1157_bard_word_analogy": 61.0, + "eval_exact_match_for_task1158_bard_word_analogy": 41.0, + "eval_exact_match_for_task1159_bard_word_analogy": 37.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 8.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 2.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 79.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 29.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 41.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 84.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 67.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 3.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 47.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 39.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 61.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 43.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 66.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 11.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 46.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 50.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 11.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 65.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 46.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 61.0, + "eval_exact_match_for_task1659_billsum_title_generation": 4.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 33.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 30.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 49.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 92.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 14.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 54.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 48.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 95.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 48.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 54.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 64.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 52.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 52.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 62.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 28.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 9.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 4.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 69.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 22.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 18.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 77.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 36.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 58.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 42.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 21.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 26.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 64.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 84.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 61.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 87.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 50.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 62.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 68.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 73.0, + "eval_exact_match_for_textual_entailment": 49.1667, + "eval_exact_match_for_title_generation": 10.5381, + "eval_exact_match_for_word_analogy": 43.625, + "eval_f1": 52.1346, + "eval_f1_for_answerability_classification": 62.0256, + "eval_f1_for_cause_effect_classification": 68.0187, + "eval_f1_for_coreference_resolution": 47.1522, + "eval_f1_for_data_to_text": 51.4215, + "eval_f1_for_dialogue_act_recognition": 56.0714, + "eval_f1_for_grammar_error_correction": 57.3579, + "eval_f1_for_keyword_tagging": 60.1316, + "eval_f1_for_overlap_extraction": 22.9449, + "eval_f1_for_question_rewriting": 70.075, + "eval_f1_for_task020_mctaco_answerability_classification": 57.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 61.0, + "eval_f1_for_task034_winogrande_question_rewriting": 91.7499, + "eval_f1_for_task035_winogrande_question_rewriting": 88.371, + "eval_f1_for_task036_qasc_keyword_tagging": 66.677, + "eval_f1_for_task039_qasc_overlap_extraction": 25.8333, + "eval_f1_for_task050_multirc_answerability_classification": 77.0, + "eval_f1_for_task102_commongen_data_to_text": 53.0097, + "eval_f1_for_task1152_bard_word_analogy": 33.0, + "eval_f1_for_task1153_bard_word_analogy": 35.0, + "eval_f1_for_task1154_bard_word_analogy": 22.0, + "eval_f1_for_task1155_bard_word_analogy": 70.0, + "eval_f1_for_task1156_bard_word_analogy": 50.6667, + "eval_f1_for_task1157_bard_word_analogy": 61.0, + "eval_f1_for_task1158_bard_word_analogy": 41.0, + "eval_f1_for_task1159_bard_word_analogy": 39.6667, + "eval_f1_for_task1161_coda_19_title_generation": 36.9733, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.2243, + "eval_f1_for_task121_atomic_question_rewriting": 48.2642, + "eval_f1_for_task133_winowhy_coreference_resolution": 2.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.2335, + "eval_f1_for_task1344_rte_textual_entailment": 79.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.5058, + "eval_f1_for_task1356_xlsum_title_generation": 25.3364, + "eval_f1_for_task1358_xlsum_title_generation": 34.8236, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 29.0, + "eval_f1_for_task1387_anli_textual_entailment": 30.0, + "eval_f1_for_task1388_cb_textual_entailment": 41.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 84.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 67.0, + "eval_f1_for_task1407_dart_data_to_text": 32.6811, + "eval_f1_for_task1409_dart_data_to_text": 48.7223, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 31.6161, + "eval_f1_for_task1439_doqa_answerability_classification": 47.0, + "eval_f1_for_task1442_doqa_answerability_classification": 54.0, + "eval_f1_for_task1516_imppres_textual_entailment": 39.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 61.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 43.0, + "eval_f1_for_task1540_peer_read_title_generation": 38.0087, + "eval_f1_for_task1554_scitail_textual_entailment": 66.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.0997, + "eval_f1_for_task1562_zest_question_rewriting": 51.1277, + "eval_f1_for_task1586_scifact_title_generation": 36.0219, + "eval_f1_for_task1598_nyc_data_to_text": 50.2699, + "eval_f1_for_task1612_sick_textual_entailment": 46.0, + "eval_f1_for_task1615_sick_textual_entailment": 50.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 80.5858, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 65.0, + "eval_f1_for_task1631_open_pi_data_to_text": 90.1601, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 61.0, + "eval_f1_for_task1659_billsum_title_generation": 38.1501, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 70.1845, + "eval_f1_for_task1728_web_nlg_data_to_text": 62.3398, + "eval_f1_for_task190_snli_textual_entailment": 30.0, + "eval_f1_for_task199_multinli_textual_entailment": 49.0, + "eval_f1_for_task200_multinli_textual_entailment": 92.0, + "eval_f1_for_task201_multinli_textual_entailment": 14.0, + "eval_f1_for_task202_multinli_textual_entailment": 54.0, + "eval_f1_for_task219_rocstories_title_generation": 20.0659, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_f1_for_task232_iirc_answerability_classification": 46.0, + "eval_f1_for_task233_iirc_answerability_classification": 48.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 95.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 59.05, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 20.0565, + "eval_f1_for_task288_gigaword_title_generation": 31.5246, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 12.3667, + "eval_f1_for_task329_gap_coreference_resolution": 54.0, + "eval_f1_for_task330_gap_coreference_resolution": 71.7302, + "eval_f1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 84.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 29.2741, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 29.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 76.6901, + "eval_f1_for_task418_persent_title_generation": 28.6284, + "eval_f1_for_task442_com_qa_question_rewriting": 71.2869, + "eval_f1_for_task500_scruples_title_generation": 20.4923, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.3491, + "eval_f1_for_task520_aquamuse_answerability_classification": 69.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 41.6305, + "eval_f1_for_task602_wikitext_title_generation": 13.5234, + "eval_f1_for_task613_liar_keyword_tagging": 24.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 46.5235, + "eval_f1_for_task619_ohsumed_title_generation": 47.7001, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.5333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 77.0, + "eval_f1_for_task640_e_snli_textual_entailment": 36.0, + "eval_f1_for_task641_e_snli_textual_entailment": 58.0, + "eval_f1_for_task642_e_snli_textual_entailment": 42.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.781, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 39.7667, + "eval_f1_for_task670_ambigqa_question_rewriting": 77.9779, + "eval_f1_for_task671_ambigqa_question_rewriting": 65.0412, + "eval_f1_for_task677_ollie_data_to_text": 29.913, + "eval_f1_for_task738_perspectrum_textual_entailment": 26.0, + "eval_f1_for_task743_eurlex_title_generation": 37.2477, + "eval_f1_for_task760_msr_sqa_data_to_text": 7.4966, + "eval_f1_for_task769_qed_title_generation": 81.4304, + "eval_f1_for_task827_copa_cause_effect_classification": 84.0, + "eval_f1_for_task828_copa_cause_effect_classification": 61.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 87.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.0, + "eval_f1_for_task890_gwsd_textual_entailment": 50.0, + "eval_f1_for_task891_gap_coreference_resolution": 70.8667, + "eval_f1_for_task892_gap_coreference_resolution": 43.0, + "eval_f1_for_task893_gap_coreference_resolution": 44.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 68.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_f1_for_task957_e2e_data_to_text": 55.6967, + "eval_f1_for_task970_sherliic_textual_entailment": 73.0, + "eval_f1_for_textual_entailment": 49.1667, + "eval_f1_for_title_generation": 38.3955, + "eval_f1_for_word_analogy": 44.0417, + "eval_gen_len": 8.8208, + "eval_global_step": 4000, + "eval_loss": 1.1603326797485352, + "eval_rouge1": 54.3961, + "eval_rouge1_for_answerability_classification": 62.0256, + "eval_rouge1_for_cause_effect_classification": 68.8615, + "eval_rouge1_for_coreference_resolution": 47.8147, + "eval_rouge1_for_data_to_text": 54.4904, + "eval_rouge1_for_dialogue_act_recognition": 59.5503, + "eval_rouge1_for_grammar_error_correction": 62.408, + "eval_rouge1_for_keyword_tagging": 66.0656, + "eval_rouge1_for_overlap_extraction": 26.4228, + "eval_rouge1_for_question_rewriting": 71.5568, + "eval_rouge1_for_task020_mctaco_answerability_classification": 57.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 61.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 91.7641, + "eval_rouge1_for_task035_winogrande_question_rewriting": 89.09, + "eval_rouge1_for_task036_qasc_keyword_tagging": 79.4278, + "eval_rouge1_for_task039_qasc_overlap_extraction": 31.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 77.0, + "eval_rouge1_for_task102_commongen_data_to_text": 68.7983, + "eval_rouge1_for_task1152_bard_word_analogy": 33.0, + "eval_rouge1_for_task1153_bard_word_analogy": 37.0, + "eval_rouge1_for_task1154_bard_word_analogy": 22.0, + "eval_rouge1_for_task1155_bard_word_analogy": 70.0, + "eval_rouge1_for_task1156_bard_word_analogy": 50.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 61.0, + "eval_rouge1_for_task1158_bard_word_analogy": 41.0, + "eval_rouge1_for_task1159_bard_word_analogy": 39.6667, + "eval_rouge1_for_task1161_coda_19_title_generation": 41.2772, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.3876, + "eval_rouge1_for_task121_atomic_question_rewriting": 50.1002, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 2.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.0105, + "eval_rouge1_for_task1344_rte_textual_entailment": 79.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.6367, + "eval_rouge1_for_task1356_xlsum_title_generation": 29.786, + "eval_rouge1_for_task1358_xlsum_title_generation": 38.9871, + "eval_rouge1_for_task1385_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 84.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 68.6857, + "eval_rouge1_for_task1407_dart_data_to_text": 33.2393, + "eval_rouge1_for_task1409_dart_data_to_text": 49.4239, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.4818, + "eval_rouge1_for_task1439_doqa_answerability_classification": 47.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 39.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 61.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 43.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 42.14, + "eval_rouge1_for_task1554_scitail_textual_entailment": 66.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.3343, + "eval_rouge1_for_task1562_zest_question_rewriting": 54.3669, + "eval_rouge1_for_task1586_scifact_title_generation": 39.815, + "eval_rouge1_for_task1598_nyc_data_to_text": 52.0558, + "eval_rouge1_for_task1612_sick_textual_entailment": 46.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 83.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.8666, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 65.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 90.3266, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 61.0, + "eval_rouge1_for_task1659_billsum_title_generation": 40.2099, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 70.1845, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 63.5979, + "eval_rouge1_for_task190_snli_textual_entailment": 30.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 49.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 92.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 14.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 54.0, + "eval_rouge1_for_task219_rocstories_title_generation": 24.1009, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 48.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 95.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 59.7167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 21.1789, + "eval_rouge1_for_task288_gigaword_title_generation": 35.059, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 13.3667, + "eval_rouge1_for_task329_gap_coreference_resolution": 54.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 71.519, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 84.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 29.3804, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 36.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 78.4777, + "eval_rouge1_for_task418_persent_title_generation": 31.5346, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.5832, + "eval_rouge1_for_task500_scruples_title_generation": 21.5209, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 44.481, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 69.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 42.9213, + "eval_rouge1_for_task602_wikitext_title_generation": 14.4758, + "eval_rouge1_for_task613_liar_keyword_tagging": 35.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 52.3169, + "eval_rouge1_for_task619_ohsumed_title_generation": 50.9191, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 43.619, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 77.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 36.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 58.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 42.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.281, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 40.4, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.0793, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 65.772, + "eval_rouge1_for_task677_ollie_data_to_text": 32.9372, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 74.0, + "eval_rouge1_for_task743_eurlex_title_generation": 38.7778, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.4854, + "eval_rouge1_for_task769_qed_title_generation": 81.384, + "eval_rouge1_for_task827_copa_cause_effect_classification": 84.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 61.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 87.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 50.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 71.0524, + "eval_rouge1_for_task892_gap_coreference_resolution": 43.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 44.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 68.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rouge1_for_task957_e2e_data_to_text": 57.7656, + "eval_rouge1_for_task970_sherliic_textual_entailment": 73.0, + "eval_rouge1_for_textual_entailment": 53.1806, + "eval_rouge1_for_title_generation": 40.8119, + "eval_rouge1_for_word_analogy": 44.2917, + "eval_rougeL": 52.9869, + "eval_rougeL_for_answerability_classification": 62.0256, + "eval_rougeL_for_cause_effect_classification": 68.3127, + "eval_rougeL_for_coreference_resolution": 47.8147, + "eval_rougeL_for_data_to_text": 46.7266, + "eval_rougeL_for_dialogue_act_recognition": 59.5503, + "eval_rougeL_for_grammar_error_correction": 61.6506, + "eval_rougeL_for_keyword_tagging": 65.7656, + "eval_rougeL_for_overlap_extraction": 26.2829, + "eval_rougeL_for_question_rewriting": 68.1835, + "eval_rougeL_for_task020_mctaco_answerability_classification": 57.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 61.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 90.5835, + "eval_rougeL_for_task035_winogrande_question_rewriting": 88.1674, + "eval_rougeL_for_task036_qasc_keyword_tagging": 79.2611, + "eval_rougeL_for_task039_qasc_overlap_extraction": 31.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 77.0, + "eval_rougeL_for_task102_commongen_data_to_text": 58.6057, + "eval_rougeL_for_task1152_bard_word_analogy": 33.0, + "eval_rougeL_for_task1153_bard_word_analogy": 37.0, + "eval_rougeL_for_task1154_bard_word_analogy": 22.0, + "eval_rougeL_for_task1155_bard_word_analogy": 70.0, + "eval_rougeL_for_task1156_bard_word_analogy": 50.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 61.0, + "eval_rougeL_for_task1158_bard_word_analogy": 41.0, + "eval_rougeL_for_task1159_bard_word_analogy": 39.6667, + "eval_rougeL_for_task1161_coda_19_title_generation": 35.6544, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.8268, + "eval_rougeL_for_task121_atomic_question_rewriting": 45.1248, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 2.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.2798, + "eval_rougeL_for_task1344_rte_textual_entailment": 79.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.5595, + "eval_rougeL_for_task1356_xlsum_title_generation": 25.7197, + "eval_rougeL_for_task1358_xlsum_title_generation": 33.5774, + "eval_rougeL_for_task1385_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 84.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 68.6857, + "eval_rougeL_for_task1407_dart_data_to_text": 27.8955, + "eval_rougeL_for_task1409_dart_data_to_text": 42.0132, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.8318, + "eval_rougeL_for_task1439_doqa_answerability_classification": 47.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 39.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 61.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 43.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 37.9235, + "eval_rougeL_for_task1554_scitail_textual_entailment": 66.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.4694, + "eval_rougeL_for_task1562_zest_question_rewriting": 48.0607, + "eval_rougeL_for_task1586_scifact_title_generation": 33.2412, + "eval_rougeL_for_task1598_nyc_data_to_text": 39.4759, + "eval_rougeL_for_task1612_sick_textual_entailment": 46.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 83.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.6311, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 65.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 88.9137, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 61.0, + "eval_rougeL_for_task1659_billsum_title_generation": 33.6315, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 70.1845, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 55.7449, + "eval_rougeL_for_task190_snli_textual_entailment": 30.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 49.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 92.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 14.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 54.0, + "eval_rougeL_for_task219_rocstories_title_generation": 23.7009, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 48.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 95.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 59.7167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 20.8992, + "eval_rougeL_for_task288_gigaword_title_generation": 30.7814, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 13.3667, + "eval_rougeL_for_task329_gap_coreference_resolution": 54.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 71.519, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 84.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.2119, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 36.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 66.382, + "eval_rougeL_for_task418_persent_title_generation": 27.393, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.3942, + "eval_rougeL_for_task500_scruples_title_generation": 20.3706, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 43.3259, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 69.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.6388, + "eval_rougeL_for_task602_wikitext_title_generation": 14.2926, + "eval_rougeL_for_task613_liar_keyword_tagging": 35.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 49.6434, + "eval_rougeL_for_task619_ohsumed_title_generation": 43.0571, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 42.2857, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 77.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 36.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 58.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 42.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.281, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 40.4, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 77.8845, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 64.4045, + "eval_rougeL_for_task677_ollie_data_to_text": 27.7822, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 74.0, + "eval_rougeL_for_task743_eurlex_title_generation": 34.3768, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.4169, + "eval_rougeL_for_task769_qed_title_generation": 80.9395, + "eval_rougeL_for_task827_copa_cause_effect_classification": 84.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 61.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 87.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 50.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 71.0524, + "eval_rougeL_for_task892_gap_coreference_resolution": 43.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 44.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 68.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rougeL_for_task957_e2e_data_to_text": 43.8622, + "eval_rougeL_for_task970_sherliic_textual_entailment": 73.0, + "eval_rougeL_for_textual_entailment": 53.1806, + "eval_rougeL_for_title_generation": 37.4786, + "eval_rougeL_for_word_analogy": 44.2917, + "eval_runtime": 812.825, + "eval_samples_per_second": 14.653, + "eval_steps_per_second": 0.917, + "step": 4000 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.9129, + "step": 4500 + }, + { + "epoch": 0.98, + "eval_exact_match": 35.8858, + "eval_exact_match_for_answerability_classification": 62.9231, + "eval_exact_match_for_cause_effect_classification": 50.0, + "eval_exact_match_for_coreference_resolution": 46.8571, + "eval_exact_match_for_data_to_text": 4.9637, + "eval_exact_match_for_dialogue_act_recognition": 57.2857, + "eval_exact_match_for_grammar_error_correction": 5.5, + "eval_exact_match_for_keyword_tagging": 50.0, + "eval_exact_match_for_overlap_extraction": 21.5, + "eval_exact_match_for_question_rewriting": 4.3636, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 59.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 4.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 45.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 43.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 63.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 26.0, + "eval_exact_match_for_task1153_bard_word_analogy": 43.0, + "eval_exact_match_for_task1154_bard_word_analogy": 21.0, + "eval_exact_match_for_task1155_bard_word_analogy": 81.0, + "eval_exact_match_for_task1156_bard_word_analogy": 60.0, + "eval_exact_match_for_task1157_bard_word_analogy": 62.0, + "eval_exact_match_for_task1158_bard_word_analogy": 41.0, + "eval_exact_match_for_task1159_bard_word_analogy": 36.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 18.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 43.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 54.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 16.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 22.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 30.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 84.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 65.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 3.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 61.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 49.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 53.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 58.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 11.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 39.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 49.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 14.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 69.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 32.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 72.0, + "eval_exact_match_for_task1659_billsum_title_generation": 4.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 46.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 46.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 84.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 14.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 19.0, + "eval_exact_match_for_task219_rocstories_title_generation": 1.0, + "eval_exact_match_for_task220_rocstories_title_generation": 99.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 58.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 45.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 96.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 56.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 64.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 63.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 59.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 73.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 59.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 52.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 32.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 4.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 91.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 1.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 21.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 75.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 37.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 49.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 41.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 10.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 40.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 71.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 91.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 63.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 46.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 53.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 70.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 72.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 74.0, + "eval_exact_match_for_textual_entailment": 45.375, + "eval_exact_match_for_title_generation": 11.0426, + "eval_exact_match_for_word_analogy": 46.25, + "eval_f1": 53.1037, + "eval_f1_for_answerability_classification": 65.4872, + "eval_f1_for_cause_effect_classification": 68.4711, + "eval_f1_for_coreference_resolution": 51.7532, + "eval_f1_for_data_to_text": 50.9563, + "eval_f1_for_dialogue_act_recognition": 59.2143, + "eval_f1_for_grammar_error_correction": 68.4925, + "eval_f1_for_keyword_tagging": 61.8349, + "eval_f1_for_overlap_extraction": 32.97, + "eval_f1_for_question_rewriting": 70.519, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 61.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 88.3167, + "eval_f1_for_task035_winogrande_question_rewriting": 90.273, + "eval_f1_for_task036_qasc_keyword_tagging": 75.527, + "eval_f1_for_task039_qasc_overlap_extraction": 48.8333, + "eval_f1_for_task050_multirc_answerability_classification": 63.0, + "eval_f1_for_task102_commongen_data_to_text": 53.0451, + "eval_f1_for_task1152_bard_word_analogy": 26.0, + "eval_f1_for_task1153_bard_word_analogy": 43.0, + "eval_f1_for_task1154_bard_word_analogy": 21.0, + "eval_f1_for_task1155_bard_word_analogy": 81.0, + "eval_f1_for_task1156_bard_word_analogy": 60.6667, + "eval_f1_for_task1157_bard_word_analogy": 62.0, + "eval_f1_for_task1158_bard_word_analogy": 41.0, + "eval_f1_for_task1159_bard_word_analogy": 36.0, + "eval_f1_for_task1161_coda_19_title_generation": 39.5543, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 82.8792, + "eval_f1_for_task121_atomic_question_rewriting": 49.1417, + "eval_f1_for_task133_winowhy_coreference_resolution": 43.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.7028, + "eval_f1_for_task1344_rte_textual_entailment": 54.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.4444, + "eval_f1_for_task1356_xlsum_title_generation": 24.0678, + "eval_f1_for_task1358_xlsum_title_generation": 35.8365, + "eval_f1_for_task1385_anli_textual_entailment": 16.0, + "eval_f1_for_task1386_anli_textual_entailment": 22.0, + "eval_f1_for_task1387_anli_textual_entailment": 30.0, + "eval_f1_for_task1388_cb_textual_entailment": 30.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 84.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 65.0, + "eval_f1_for_task1407_dart_data_to_text": 33.406, + "eval_f1_for_task1409_dart_data_to_text": 47.3477, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 53.4987, + "eval_f1_for_task1439_doqa_answerability_classification": 48.0, + "eval_f1_for_task1442_doqa_answerability_classification": 61.0, + "eval_f1_for_task1516_imppres_textual_entailment": 49.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 53.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 40.9423, + "eval_f1_for_task1554_scitail_textual_entailment": 58.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.4863, + "eval_f1_for_task1562_zest_question_rewriting": 54.1629, + "eval_f1_for_task1586_scifact_title_generation": 37.1278, + "eval_f1_for_task1598_nyc_data_to_text": 50.2932, + "eval_f1_for_task1612_sick_textual_entailment": 39.0, + "eval_f1_for_task1615_sick_textual_entailment": 49.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.9036, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 69.0, + "eval_f1_for_task1631_open_pi_data_to_text": 87.0057, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 72.0, + "eval_f1_for_task1659_billsum_title_generation": 37.3598, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 76.8048, + "eval_f1_for_task1728_web_nlg_data_to_text": 63.5453, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 46.0, + "eval_f1_for_task200_multinli_textual_entailment": 84.0, + "eval_f1_for_task201_multinli_textual_entailment": 14.0, + "eval_f1_for_task202_multinli_textual_entailment": 19.0, + "eval_f1_for_task219_rocstories_title_generation": 15.6825, + "eval_f1_for_task220_rocstories_title_generation": 99.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_f1_for_task232_iirc_answerability_classification": 58.0, + "eval_f1_for_task233_iirc_answerability_classification": 45.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 96.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 66.8833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 17.1066, + "eval_f1_for_task288_gigaword_title_generation": 29.6092, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 4.3333, + "eval_f1_for_task329_gap_coreference_resolution": 64.0, + "eval_f1_for_task330_gap_coreference_resolution": 72.3238, + "eval_f1_for_task349_squad2.0_answerability_classification": 59.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 86.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 86.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.8835, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 32.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 79.0134, + "eval_f1_for_task418_persent_title_generation": 29.4778, + "eval_f1_for_task442_com_qa_question_rewriting": 71.5523, + "eval_f1_for_task500_scruples_title_generation": 19.6283, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.0735, + "eval_f1_for_task520_aquamuse_answerability_classification": 91.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 40.0131, + "eval_f1_for_task602_wikitext_title_generation": 15.5887, + "eval_f1_for_task613_liar_keyword_tagging": 21.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 37.0811, + "eval_f1_for_task619_ohsumed_title_generation": 45.8186, + "eval_f1_for_task620_ohsumed_keyword_tagging": 41.9, + "eval_f1_for_task623_ohsumed_keyword_tagging": 75.0, + "eval_f1_for_task640_e_snli_textual_entailment": 37.0, + "eval_f1_for_task641_e_snli_textual_entailment": 49.0, + "eval_f1_for_task642_e_snli_textual_entailment": 41.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 95.081, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 15.3333, + "eval_f1_for_task670_ambigqa_question_rewriting": 78.7485, + "eval_f1_for_task671_ambigqa_question_rewriting": 62.2733, + "eval_f1_for_task677_ollie_data_to_text": 30.9067, + "eval_f1_for_task738_perspectrum_textual_entailment": 40.0, + "eval_f1_for_task743_eurlex_title_generation": 34.7975, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.1227, + "eval_f1_for_task769_qed_title_generation": 86.8134, + "eval_f1_for_task827_copa_cause_effect_classification": 91.0, + "eval_f1_for_task828_copa_cause_effect_classification": 63.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.0, + "eval_f1_for_task890_gwsd_textual_entailment": 46.0, + "eval_f1_for_task891_gap_coreference_resolution": 60.5333, + "eval_f1_for_task892_gap_coreference_resolution": 44.0, + "eval_f1_for_task893_gap_coreference_resolution": 70.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 72.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_f1_for_task957_e2e_data_to_text": 54.5378, + "eval_f1_for_task970_sherliic_textual_entailment": 74.0, + "eval_f1_for_textual_entailment": 45.375, + "eval_f1_for_title_generation": 38.4865, + "eval_f1_for_word_analogy": 46.3333, + "eval_gen_len": 8.091, + "eval_global_step": 4500, + "eval_loss": 1.1735401153564453, + "eval_rouge1": 55.5064, + "eval_rouge1_for_answerability_classification": 65.4872, + "eval_rouge1_for_cause_effect_classification": 68.9307, + "eval_rouge1_for_coreference_resolution": 52.4189, + "eval_rouge1_for_data_to_text": 54.1071, + "eval_rouge1_for_dialogue_act_recognition": 63.2889, + "eval_rouge1_for_grammar_error_correction": 71.0756, + "eval_rouge1_for_keyword_tagging": 67.1289, + "eval_rouge1_for_overlap_extraction": 35.7253, + "eval_rouge1_for_question_rewriting": 72.0336, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 61.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 88.3224, + "eval_rouge1_for_task035_winogrande_question_rewriting": 90.8067, + "eval_rouge1_for_task036_qasc_keyword_tagging": 81.9778, + "eval_rouge1_for_task039_qasc_overlap_extraction": 53.8333, + "eval_rouge1_for_task050_multirc_answerability_classification": 63.0, + "eval_rouge1_for_task102_commongen_data_to_text": 68.0462, + "eval_rouge1_for_task1152_bard_word_analogy": 26.0, + "eval_rouge1_for_task1153_bard_word_analogy": 44.0, + "eval_rouge1_for_task1154_bard_word_analogy": 21.0, + "eval_rouge1_for_task1155_bard_word_analogy": 81.0, + "eval_rouge1_for_task1156_bard_word_analogy": 60.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 62.0, + "eval_rouge1_for_task1158_bard_word_analogy": 41.0, + "eval_rouge1_for_task1159_bard_word_analogy": 36.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 42.7803, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 83.1642, + "eval_rouge1_for_task121_atomic_question_rewriting": 51.3571, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 43.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.7246, + "eval_rouge1_for_task1344_rte_textual_entailment": 54.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.2617, + "eval_rouge1_for_task1356_xlsum_title_generation": 28.7221, + "eval_rouge1_for_task1358_xlsum_title_generation": 40.1074, + "eval_rouge1_for_task1385_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 38.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 84.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 68.1889, + "eval_rouge1_for_task1407_dart_data_to_text": 34.5003, + "eval_rouge1_for_task1409_dart_data_to_text": 48.3588, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 55.4622, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 61.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 49.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 53.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 43.2584, + "eval_rouge1_for_task1554_scitail_textual_entailment": 58.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.689, + "eval_rouge1_for_task1562_zest_question_rewriting": 56.8503, + "eval_rouge1_for_task1586_scifact_title_generation": 40.9799, + "eval_rouge1_for_task1598_nyc_data_to_text": 52.3137, + "eval_rouge1_for_task1612_sick_textual_entailment": 39.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 83.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.1078, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 69.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 87.3201, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 72.0, + "eval_rouge1_for_task1659_billsum_title_generation": 39.4398, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 76.8048, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 65.1922, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 46.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 84.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 14.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 19.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.7121, + "eval_rouge1_for_task220_rocstories_title_generation": 99.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 58.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 45.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 96.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 67.05, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 17.6173, + "eval_rouge1_for_task288_gigaword_title_generation": 32.9165, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 4.3333, + "eval_rouge1_for_task329_gap_coreference_resolution": 64.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 72.1429, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 59.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 86.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 86.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.8707, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 41.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 81.2881, + "eval_rouge1_for_task418_persent_title_generation": 32.686, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.9407, + "eval_rouge1_for_task500_scruples_title_generation": 20.8929, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.7199, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 91.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 41.9219, + "eval_rouge1_for_task602_wikitext_title_generation": 16.5727, + "eval_rouge1_for_task613_liar_keyword_tagging": 34.6333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 40.3111, + "eval_rouge1_for_task619_ohsumed_title_generation": 49.2196, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 48.4524, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 75.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 37.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 49.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 41.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.581, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 15.3333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.87, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.4006, + "eval_rouge1_for_task677_ollie_data_to_text": 33.75, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 79.0, + "eval_rouge1_for_task743_eurlex_title_generation": 37.0216, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.2444, + "eval_rouge1_for_task769_qed_title_generation": 86.8476, + "eval_rouge1_for_task827_copa_cause_effect_classification": 91.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 63.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 56.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 46.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 60.8667, + "eval_rouge1_for_task892_gap_coreference_resolution": 44.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 70.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 72.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rouge1_for_task957_e2e_data_to_text": 56.5998, + "eval_rouge1_for_task970_sherliic_textual_entailment": 74.0, + "eval_rouge1_for_textual_entailment": 50.4583, + "eval_rouge1_for_title_generation": 40.856, + "eval_rouge1_for_word_analogy": 46.4583, + "eval_rougeL": 54.0787, + "eval_rougeL_for_answerability_classification": 65.4872, + "eval_rougeL_for_cause_effect_classification": 68.4139, + "eval_rougeL_for_coreference_resolution": 52.4189, + "eval_rougeL_for_data_to_text": 45.9041, + "eval_rougeL_for_dialogue_act_recognition": 63.2889, + "eval_rougeL_for_grammar_error_correction": 69.9671, + "eval_rougeL_for_keyword_tagging": 66.8089, + "eval_rougeL_for_overlap_extraction": 35.5317, + "eval_rougeL_for_question_rewriting": 68.2641, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 61.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 86.9223, + "eval_rougeL_for_task035_winogrande_question_rewriting": 90.6572, + "eval_rougeL_for_task036_qasc_keyword_tagging": 81.7778, + "eval_rougeL_for_task039_qasc_overlap_extraction": 53.8333, + "eval_rougeL_for_task050_multirc_answerability_classification": 63.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.2271, + "eval_rougeL_for_task1152_bard_word_analogy": 26.0, + "eval_rougeL_for_task1153_bard_word_analogy": 44.0, + "eval_rougeL_for_task1154_bard_word_analogy": 21.0, + "eval_rougeL_for_task1155_bard_word_analogy": 81.0, + "eval_rougeL_for_task1156_bard_word_analogy": 60.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 62.0, + "eval_rougeL_for_task1158_bard_word_analogy": 41.0, + "eval_rougeL_for_task1159_bard_word_analogy": 36.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 36.5542, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 83.0689, + "eval_rougeL_for_task121_atomic_question_rewriting": 45.45, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 43.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.7504, + "eval_rougeL_for_task1344_rte_textual_entailment": 54.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.5862, + "eval_rougeL_for_task1356_xlsum_title_generation": 25.3293, + "eval_rougeL_for_task1358_xlsum_title_generation": 34.0338, + "eval_rougeL_for_task1385_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 38.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 84.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 68.1889, + "eval_rougeL_for_task1407_dart_data_to_text": 29.118, + "eval_rougeL_for_task1409_dart_data_to_text": 42.9086, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 54.1666, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 61.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 49.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 53.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 40.3608, + "eval_rougeL_for_task1554_scitail_textual_entailment": 58.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7676, + "eval_rougeL_for_task1562_zest_question_rewriting": 49.7461, + "eval_rougeL_for_task1586_scifact_title_generation": 35.1651, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.5292, + "eval_rougeL_for_task1612_sick_textual_entailment": 39.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 83.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.0367, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 69.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 82.6198, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 72.0, + "eval_rougeL_for_task1659_billsum_title_generation": 34.1951, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 76.8048, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 57.0907, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 46.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 84.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 14.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 19.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.7121, + "eval_rougeL_for_task220_rocstories_title_generation": 99.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 58.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 45.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 96.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 67.05, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 17.2302, + "eval_rougeL_for_task288_gigaword_title_generation": 29.3107, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 4.3333, + "eval_rougeL_for_task329_gap_coreference_resolution": 64.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 72.1429, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 59.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 86.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 86.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 33.2124, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 41.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 65.5059, + "eval_rougeL_for_task418_persent_title_generation": 28.6191, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.5296, + "eval_rougeL_for_task500_scruples_title_generation": 19.7443, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.6749, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 91.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.4451, + "eval_rougeL_for_task602_wikitext_title_generation": 16.5727, + "eval_rougeL_for_task613_liar_keyword_tagging": 34.6333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 37.3514, + "eval_rougeL_for_task619_ohsumed_title_generation": 42.7701, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 47.0524, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 75.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 37.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 49.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 41.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.581, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 15.3333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.1171, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.285, + "eval_rougeL_for_task677_ollie_data_to_text": 27.1729, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 79.0, + "eval_rougeL_for_task743_eurlex_title_generation": 32.4427, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.0257, + "eval_rougeL_for_task769_qed_title_generation": 86.8476, + "eval_rougeL_for_task827_copa_cause_effect_classification": 91.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 63.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 56.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 46.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 60.8667, + "eval_rougeL_for_task892_gap_coreference_resolution": 44.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 70.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 72.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rougeL_for_task957_e2e_data_to_text": 43.7148, + "eval_rougeL_for_task970_sherliic_textual_entailment": 74.0, + "eval_rougeL_for_textual_entailment": 50.4583, + "eval_rougeL_for_title_generation": 37.8854, + "eval_rougeL_for_word_analogy": 46.4583, + "eval_runtime": 752.8471, + "eval_samples_per_second": 15.82, + "eval_steps_per_second": 0.99, + "step": 4500 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.7361, + "step": 5000 + }, + { + "epoch": 1.09, + "eval_exact_match": 35.4156, + "eval_exact_match_for_answerability_classification": 63.0769, + "eval_exact_match_for_cause_effect_classification": 50.0, + "eval_exact_match_for_coreference_resolution": 42.4286, + "eval_exact_match_for_data_to_text": 6.2954, + "eval_exact_match_for_dialogue_act_recognition": 54.7143, + "eval_exact_match_for_grammar_error_correction": 5.5, + "eval_exact_match_for_keyword_tagging": 47.8, + "eval_exact_match_for_overlap_extraction": 18.0, + "eval_exact_match_for_question_rewriting": 4.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 55.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 58.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 10.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 7.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 43.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 36.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 68.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 27.0, + "eval_exact_match_for_task1153_bard_word_analogy": 40.0, + "eval_exact_match_for_task1154_bard_word_analogy": 26.0, + "eval_exact_match_for_task1155_bard_word_analogy": 77.0, + "eval_exact_match_for_task1156_bard_word_analogy": 59.0, + "eval_exact_match_for_task1157_bard_word_analogy": 63.0, + "eval_exact_match_for_task1158_bard_word_analogy": 52.0, + "eval_exact_match_for_task1159_bard_word_analogy": 37.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 11.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 0.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 67.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 10.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 20.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 23.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 35.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 64.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 81.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 61.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 3.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 46.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 59.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 45.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 58.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 67.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 11.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 53.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 49.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 10.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 65.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 43.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 78.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 42.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 8.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 45.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 90.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 10.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 71.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 99.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 96.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 49.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 58.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 64.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 59.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 55.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 54.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 31.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 2.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 2.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 89.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 5.9524, + "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 2.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 21.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 68.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 40.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 36.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 87.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 10.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 34.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 69.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 89.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 69.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 79.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 52.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 58.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 64.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 76.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 75.0, + "eval_exact_match_for_textual_entailment": 46.2083, + "eval_exact_match_for_title_generation": 11.0426, + "eval_exact_match_for_word_analogy": 47.625, + "eval_f1": 52.7092, + "eval_f1_for_answerability_classification": 65.641, + "eval_f1_for_cause_effect_classification": 69.3677, + "eval_f1_for_coreference_resolution": 47.7905, + "eval_f1_for_data_to_text": 50.6257, + "eval_f1_for_dialogue_act_recognition": 58.2857, + "eval_f1_for_grammar_error_correction": 68.2355, + "eval_f1_for_keyword_tagging": 59.3878, + "eval_f1_for_overlap_extraction": 30.1621, + "eval_f1_for_question_rewriting": 69.371, + "eval_f1_for_task020_mctaco_answerability_classification": 55.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 62.0, + "eval_f1_for_task034_winogrande_question_rewriting": 92.6735, + "eval_f1_for_task035_winogrande_question_rewriting": 88.4728, + "eval_f1_for_task036_qasc_keyword_tagging": 72.5151, + "eval_f1_for_task039_qasc_overlap_extraction": 44.5, + "eval_f1_for_task050_multirc_answerability_classification": 68.0, + "eval_f1_for_task102_commongen_data_to_text": 54.2971, + "eval_f1_for_task1152_bard_word_analogy": 27.0, + "eval_f1_for_task1153_bard_word_analogy": 40.0, + "eval_f1_for_task1154_bard_word_analogy": 26.0, + "eval_f1_for_task1155_bard_word_analogy": 77.0, + "eval_f1_for_task1156_bard_word_analogy": 59.6667, + "eval_f1_for_task1157_bard_word_analogy": 63.0, + "eval_f1_for_task1158_bard_word_analogy": 52.0, + "eval_f1_for_task1159_bard_word_analogy": 37.0, + "eval_f1_for_task1161_coda_19_title_generation": 38.9281, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.8154, + "eval_f1_for_task121_atomic_question_rewriting": 47.5297, + "eval_f1_for_task133_winowhy_coreference_resolution": 0.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.9718, + "eval_f1_for_task1344_rte_textual_entailment": 67.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.4384, + "eval_f1_for_task1356_xlsum_title_generation": 24.1726, + "eval_f1_for_task1358_xlsum_title_generation": 36.2993, + "eval_f1_for_task1385_anli_textual_entailment": 10.0, + "eval_f1_for_task1386_anli_textual_entailment": 20.0, + "eval_f1_for_task1387_anli_textual_entailment": 23.0, + "eval_f1_for_task1388_cb_textual_entailment": 35.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 64.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 81.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 61.0, + "eval_f1_for_task1407_dart_data_to_text": 22.9083, + "eval_f1_for_task1409_dart_data_to_text": 49.3771, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 53.2536, + "eval_f1_for_task1439_doqa_answerability_classification": 46.0, + "eval_f1_for_task1442_doqa_answerability_classification": 59.0, + "eval_f1_for_task1516_imppres_textual_entailment": 45.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 58.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 39.0895, + "eval_f1_for_task1554_scitail_textual_entailment": 67.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.2174, + "eval_f1_for_task1562_zest_question_rewriting": 52.7794, + "eval_f1_for_task1586_scifact_title_generation": 38.7743, + "eval_f1_for_task1598_nyc_data_to_text": 50.0597, + "eval_f1_for_task1612_sick_textual_entailment": 53.0, + "eval_f1_for_task1615_sick_textual_entailment": 49.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 80.7327, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 65.0, + "eval_f1_for_task1631_open_pi_data_to_text": 90.5779, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 78.0, + "eval_f1_for_task1659_billsum_title_generation": 35.4098, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 74.1429, + "eval_f1_for_task1728_web_nlg_data_to_text": 66.6521, + "eval_f1_for_task190_snli_textual_entailment": 8.0, + "eval_f1_for_task199_multinli_textual_entailment": 45.0, + "eval_f1_for_task200_multinli_textual_entailment": 90.0, + "eval_f1_for_task201_multinli_textual_entailment": 10.0, + "eval_f1_for_task202_multinli_textual_entailment": 71.0, + "eval_f1_for_task219_rocstories_title_generation": 17.3932, + "eval_f1_for_task220_rocstories_title_generation": 99.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_f1_for_task232_iirc_answerability_classification": 49.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 96.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 59.3667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 15.8242, + "eval_f1_for_task288_gigaword_title_generation": 30.1704, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_f1_for_task329_gap_coreference_resolution": 58.0, + "eval_f1_for_task330_gap_coreference_resolution": 72.5238, + "eval_f1_for_task349_squad2.0_answerability_classification": 59.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.0388, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 32.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 72.0955, + "eval_f1_for_task418_persent_title_generation": 30.3685, + "eval_f1_for_task442_com_qa_question_rewriting": 72.3198, + "eval_f1_for_task500_scruples_title_generation": 23.5154, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 40.8212, + "eval_f1_for_task520_aquamuse_answerability_classification": 89.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 41.0387, + "eval_f1_for_task602_wikitext_title_generation": 15.2259, + "eval_f1_for_task613_liar_keyword_tagging": 22.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 45.8685, + "eval_f1_for_task619_ohsumed_title_generation": 45.3233, + "eval_f1_for_task620_ohsumed_keyword_tagging": 40.0, + "eval_f1_for_task623_ohsumed_keyword_tagging": 68.0, + "eval_f1_for_task640_e_snli_textual_entailment": 40.0, + "eval_f1_for_task641_e_snli_textual_entailment": 30.0, + "eval_f1_for_task642_e_snli_textual_entailment": 36.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.4238, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 20.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 73.002, + "eval_f1_for_task671_ambigqa_question_rewriting": 63.2219, + "eval_f1_for_task677_ollie_data_to_text": 27.1842, + "eval_f1_for_task738_perspectrum_textual_entailment": 34.0, + "eval_f1_for_task743_eurlex_title_generation": 37.3277, + "eval_f1_for_task760_msr_sqa_data_to_text": 5.4268, + "eval_f1_for_task769_qed_title_generation": 89.6489, + "eval_f1_for_task827_copa_cause_effect_classification": 89.0, + "eval_f1_for_task828_copa_cause_effect_classification": 69.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 79.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_f1_for_task890_gwsd_textual_entailment": 52.0, + "eval_f1_for_task891_gap_coreference_resolution": 66.7, + "eval_f1_for_task892_gap_coreference_resolution": 43.0, + "eval_f1_for_task893_gap_coreference_resolution": 64.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 76.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_f1_for_task957_e2e_data_to_text": 55.7006, + "eval_f1_for_task970_sherliic_textual_entailment": 75.0, + "eval_f1_for_textual_entailment": 46.2083, + "eval_f1_for_title_generation": 39.0158, + "eval_f1_for_word_analogy": 47.7083, + "eval_gen_len": 8.6991, + "eval_global_step": 5000, + "eval_loss": 1.265062928199768, + "eval_rouge1": 55.4027, + "eval_rouge1_for_answerability_classification": 65.641, + "eval_rouge1_for_cause_effect_classification": 70.0251, + "eval_rouge1_for_coreference_resolution": 48.5418, + "eval_rouge1_for_data_to_text": 53.8534, + "eval_rouge1_for_dialogue_act_recognition": 61.0122, + "eval_rouge1_for_grammar_error_correction": 71.4044, + "eval_rouge1_for_keyword_tagging": 65.1656, + "eval_rouge1_for_overlap_extraction": 34.3666, + "eval_rouge1_for_question_rewriting": 70.9566, + "eval_rouge1_for_task020_mctaco_answerability_classification": 55.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 62.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.719, + "eval_rouge1_for_task035_winogrande_question_rewriting": 89.1939, + "eval_rouge1_for_task036_qasc_keyword_tagging": 80.504, + "eval_rouge1_for_task039_qasc_overlap_extraction": 52.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 68.0, + "eval_rouge1_for_task102_commongen_data_to_text": 68.7507, + "eval_rouge1_for_task1152_bard_word_analogy": 27.0, + "eval_rouge1_for_task1153_bard_word_analogy": 40.0, + "eval_rouge1_for_task1154_bard_word_analogy": 26.0, + "eval_rouge1_for_task1155_bard_word_analogy": 77.0, + "eval_rouge1_for_task1156_bard_word_analogy": 59.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 63.0, + "eval_rouge1_for_task1158_bard_word_analogy": 52.0, + "eval_rouge1_for_task1159_bard_word_analogy": 37.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 42.5347, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 82.0698, + "eval_rouge1_for_task121_atomic_question_rewriting": 49.8285, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 0.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 16.5418, + "eval_rouge1_for_task1344_rte_textual_entailment": 67.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.652, + "eval_rouge1_for_task1356_xlsum_title_generation": 28.3531, + "eval_rouge1_for_task1358_xlsum_title_generation": 40.7117, + "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 41.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 56.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 64.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 81.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 64.0857, + "eval_rouge1_for_task1407_dart_data_to_text": 24.5399, + "eval_rouge1_for_task1409_dart_data_to_text": 50.5753, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 56.6508, + "eval_rouge1_for_task1439_doqa_answerability_classification": 46.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 59.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 45.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 58.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 43.5161, + "eval_rouge1_for_task1554_scitail_textual_entailment": 67.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.158, + "eval_rouge1_for_task1562_zest_question_rewriting": 55.2578, + "eval_rouge1_for_task1586_scifact_title_generation": 43.1572, + "eval_rouge1_for_task1598_nyc_data_to_text": 51.9724, + "eval_rouge1_for_task1612_sick_textual_entailment": 53.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 83.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 81.0896, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 65.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 90.6749, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 78.0, + "eval_rouge1_for_task1659_billsum_title_generation": 37.3231, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 74.1429, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 68.5699, + "eval_rouge1_for_task190_snli_textual_entailment": 8.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 45.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 90.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 10.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 71.0, + "eval_rouge1_for_task219_rocstories_title_generation": 21.2085, + "eval_rouge1_for_task220_rocstories_title_generation": 99.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 96.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 60.0333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 16.3998, + "eval_rouge1_for_task288_gigaword_title_generation": 34.175, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_rouge1_for_task329_gap_coreference_resolution": 58.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 72.3429, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 59.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.1457, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 42.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 74.3518, + "eval_rouge1_for_task418_persent_title_generation": 33.623, + "eval_rouge1_for_task442_com_qa_question_rewriting": 75.6693, + "eval_rouge1_for_task500_scruples_title_generation": 25.3214, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 41.5112, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 89.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 42.3788, + "eval_rouge1_for_task602_wikitext_title_generation": 15.9984, + "eval_rouge1_for_task613_liar_keyword_tagging": 34.4667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 50.3633, + "eval_rouge1_for_task619_ohsumed_title_generation": 49.2241, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 47.9333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 68.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 40.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 36.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.9238, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 20.1333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 74.2134, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.4777, + "eval_rouge1_for_task677_ollie_data_to_text": 30.3397, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 81.0, + "eval_rouge1_for_task743_eurlex_title_generation": 39.2113, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.9086, + "eval_rouge1_for_task769_qed_title_generation": 89.6629, + "eval_rouge1_for_task827_copa_cause_effect_classification": 89.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 69.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 79.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 58.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 52.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 66.9333, + "eval_rouge1_for_task892_gap_coreference_resolution": 43.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 64.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 76.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rouge1_for_task957_e2e_data_to_text": 57.8698, + "eval_rouge1_for_task970_sherliic_textual_entailment": 75.0, + "eval_rouge1_for_textual_entailment": 52.625, + "eval_rouge1_for_title_generation": 41.5298, + "eval_rouge1_for_word_analogy": 47.7083, + "eval_rougeL": 53.9418, + "eval_rougeL_for_answerability_classification": 65.641, + "eval_rougeL_for_cause_effect_classification": 69.6532, + "eval_rougeL_for_coreference_resolution": 48.5418, + "eval_rougeL_for_data_to_text": 45.6967, + "eval_rougeL_for_dialogue_act_recognition": 61.0122, + "eval_rougeL_for_grammar_error_correction": 70.3271, + "eval_rougeL_for_keyword_tagging": 64.6611, + "eval_rougeL_for_overlap_extraction": 34.2539, + "eval_rougeL_for_question_rewriting": 67.404, + "eval_rougeL_for_task020_mctaco_answerability_classification": 55.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 62.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.9949, + "eval_rougeL_for_task035_winogrande_question_rewriting": 88.5342, + "eval_rougeL_for_task036_qasc_keyword_tagging": 79.1317, + "eval_rougeL_for_task039_qasc_overlap_extraction": 52.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 68.0, + "eval_rougeL_for_task102_commongen_data_to_text": 58.1508, + "eval_rougeL_for_task1152_bard_word_analogy": 27.0, + "eval_rougeL_for_task1153_bard_word_analogy": 40.0, + "eval_rougeL_for_task1154_bard_word_analogy": 26.0, + "eval_rougeL_for_task1155_bard_word_analogy": 77.0, + "eval_rougeL_for_task1156_bard_word_analogy": 59.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 63.0, + "eval_rougeL_for_task1158_bard_word_analogy": 52.0, + "eval_rougeL_for_task1159_bard_word_analogy": 37.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 35.1473, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 81.4169, + "eval_rougeL_for_task121_atomic_question_rewriting": 43.9282, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 0.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.6105, + "eval_rougeL_for_task1344_rte_textual_entailment": 67.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.769, + "eval_rougeL_for_task1356_xlsum_title_generation": 23.772, + "eval_rougeL_for_task1358_xlsum_title_generation": 34.8033, + "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 41.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 56.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 64.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 81.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 64.0857, + "eval_rougeL_for_task1407_dart_data_to_text": 19.6133, + "eval_rougeL_for_task1409_dart_data_to_text": 42.7796, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 55.4176, + "eval_rougeL_for_task1439_doqa_answerability_classification": 46.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 59.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 45.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 58.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 39.9862, + "eval_rougeL_for_task1554_scitail_textual_entailment": 67.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.2366, + "eval_rougeL_for_task1562_zest_question_rewriting": 48.6638, + "eval_rougeL_for_task1586_scifact_title_generation": 36.7045, + "eval_rougeL_for_task1598_nyc_data_to_text": 40.84, + "eval_rougeL_for_task1612_sick_textual_entailment": 53.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 83.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 80.5018, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 65.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 87.0616, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 78.0, + "eval_rougeL_for_task1659_billsum_title_generation": 31.5371, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 74.1429, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 59.472, + "eval_rougeL_for_task190_snli_textual_entailment": 8.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 45.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 90.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 10.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 71.0, + "eval_rougeL_for_task219_rocstories_title_generation": 21.2085, + "eval_rougeL_for_task220_rocstories_title_generation": 99.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 96.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 60.0333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 16.1745, + "eval_rougeL_for_task288_gigaword_title_generation": 29.6762, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_rougeL_for_task329_gap_coreference_resolution": 58.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 72.3429, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 59.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.367, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 42.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 62.0545, + "eval_rougeL_for_task418_persent_title_generation": 30.203, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.2847, + "eval_rougeL_for_task500_scruples_title_generation": 24.3047, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 40.1692, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 89.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.7701, + "eval_rougeL_for_task602_wikitext_title_generation": 15.9984, + "eval_rougeL_for_task613_liar_keyword_tagging": 34.4667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 48.5391, + "eval_rougeL_for_task619_ohsumed_title_generation": 40.7169, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 46.7833, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 68.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 40.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 36.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.9238, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 20.1333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 72.3223, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.9739, + "eval_rougeL_for_task677_ollie_data_to_text": 24.7007, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 81.0, + "eval_rougeL_for_task743_eurlex_title_generation": 34.1434, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.387, + "eval_rougeL_for_task769_qed_title_generation": 89.6629, + "eval_rougeL_for_task827_copa_cause_effect_classification": 89.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 69.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 79.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 58.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 52.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 66.9333, + "eval_rougeL_for_task892_gap_coreference_resolution": 43.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 64.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 76.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rougeL_for_task957_e2e_data_to_text": 43.4362, + "eval_rougeL_for_task970_sherliic_textual_entailment": 75.0, + "eval_rougeL_for_textual_entailment": 52.625, + "eval_rougeL_for_title_generation": 38.1645, + "eval_rougeL_for_word_analogy": 47.7083, + "eval_runtime": 788.0222, + "eval_samples_per_second": 15.114, + "eval_steps_per_second": 0.945, + "step": 5000 + }, + { + "epoch": 1.09, + "step": 5000, + "total_flos": 4.731573510382551e+17, + "train_loss": 0.9779035568237304, + "train_runtime": 33930.9714, + "train_samples_per_second": 2.358, + "train_steps_per_second": 0.147 + } + ], + "max_steps": 5000, + "num_train_epochs": 2, + "total_flos": 4.731573510382551e+17, + "trial_name": null, + "trial_params": null +}