{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0926472194908774, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 5.8716, "step": 1 }, { "epoch": 0.0, "eval_exact_match": 11.1755, "eval_exact_match_for_answerability_classification": 9.1538, "eval_exact_match_for_cause_effect_classification": 2.1429, "eval_exact_match_for_coreference_resolution": 10.0, "eval_exact_match_for_data_to_text": 1.6949, "eval_exact_match_for_dialogue_act_recognition": 27.5714, "eval_exact_match_for_grammar_error_correction": 4.0, "eval_exact_match_for_keyword_tagging": 12.4, "eval_exact_match_for_overlap_extraction": 1.0, "eval_exact_match_for_question_rewriting": 1.0, "eval_exact_match_for_task020_mctaco_answerability_classification": 9.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 6.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 1.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 2.0, "eval_exact_match_for_task050_multirc_answerability_classification": 0.0, "eval_exact_match_for_task102_commongen_data_to_text": 4.0, "eval_exact_match_for_task1152_bard_word_analogy": 6.0, "eval_exact_match_for_task1153_bard_word_analogy": 2.0, "eval_exact_match_for_task1154_bard_word_analogy": 0.0, "eval_exact_match_for_task1155_bard_word_analogy": 0.0, "eval_exact_match_for_task1156_bard_word_analogy": 5.0, "eval_exact_match_for_task1157_bard_word_analogy": 0.0, "eval_exact_match_for_task1158_bard_word_analogy": 0.0, "eval_exact_match_for_task1159_bard_word_analogy": 6.0, "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 5.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 0.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 49.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 9.0, "eval_exact_match_for_task1386_anli_textual_entailment": 23.0, "eval_exact_match_for_task1387_anli_textual_entailment": 21.0, "eval_exact_match_for_task1388_cb_textual_entailment": 5.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 0.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 0.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 9.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 0.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 0.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 0.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 25.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 11.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 42.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 8.0, "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 29.0, "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 27.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 5.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 6.0, "eval_exact_match_for_task1659_billsum_title_generation": 0.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, "eval_exact_match_for_task190_snli_textual_entailment": 1.0, "eval_exact_match_for_task199_multinli_textual_entailment": 42.0, "eval_exact_match_for_task200_multinli_textual_entailment": 19.0, "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, "eval_exact_match_for_task219_rocstories_title_generation": 5.0, "eval_exact_match_for_task220_rocstories_title_generation": 69.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.0, "eval_exact_match_for_task232_iirc_answerability_classification": 1.0, "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 48.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 21.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 0.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, "eval_exact_match_for_task330_gap_coreference_resolution": 10.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 0.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 19.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 6.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 1.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, "eval_exact_match_for_task418_persent_title_generation": 2.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 28.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 1.0, "eval_exact_match_for_task602_wikitext_title_generation": 0.0, "eval_exact_match_for_task613_liar_keyword_tagging": 17.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 0.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 0.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 37.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 0.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 0.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 44.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 3.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 56.0, "eval_exact_match_for_task743_eurlex_title_generation": 0.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 26.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 0.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 0.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 27.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 33.0, "eval_exact_match_for_task891_gap_coreference_resolution": 19.0, "eval_exact_match_for_task892_gap_coreference_resolution": 34.0, "eval_exact_match_for_task893_gap_coreference_resolution": 13.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 48.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 0.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 45.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 0.0, "eval_exact_match_for_textual_entailment": 26.7083, "eval_exact_match_for_title_generation": 5.9978, "eval_exact_match_for_word_analogy": 2.375, "eval_f1": 27.0871, "eval_f1_for_answerability_classification": 10.3639, "eval_f1_for_cause_effect_classification": 22.5808, "eval_f1_for_coreference_resolution": 17.9603, "eval_f1_for_data_to_text": 48.128, "eval_f1_for_dialogue_act_recognition": 31.5121, "eval_f1_for_grammar_error_correction": 53.8723, "eval_f1_for_keyword_tagging": 26.9887, "eval_f1_for_overlap_extraction": 35.5273, "eval_f1_for_question_rewriting": 57.3275, "eval_f1_for_task020_mctaco_answerability_classification": 11.2336, "eval_f1_for_task033_winogrande_coreference_resolution": 6.0, "eval_f1_for_task034_winogrande_question_rewriting": 47.2813, "eval_f1_for_task035_winogrande_question_rewriting": 67.1275, "eval_f1_for_task036_qasc_keyword_tagging": 44.9834, "eval_f1_for_task039_qasc_overlap_extraction": 26.6667, "eval_f1_for_task050_multirc_answerability_classification": 0.4293, "eval_f1_for_task102_commongen_data_to_text": 61.7356, "eval_f1_for_task1152_bard_word_analogy": 6.0, "eval_f1_for_task1153_bard_word_analogy": 2.0, "eval_f1_for_task1154_bard_word_analogy": 0.0, "eval_f1_for_task1155_bard_word_analogy": 0.0, "eval_f1_for_task1156_bard_word_analogy": 5.0, "eval_f1_for_task1157_bard_word_analogy": 0.0, "eval_f1_for_task1158_bard_word_analogy": 0.0, "eval_f1_for_task1159_bard_word_analogy": 6.0, "eval_f1_for_task1161_coda_19_title_generation": 27.0316, "eval_f1_for_task1195_disfl_qa_question_rewriting": 72.279, "eval_f1_for_task121_atomic_question_rewriting": 43.4431, "eval_f1_for_task133_winowhy_coreference_resolution": 2.516, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 11.0422, "eval_f1_for_task1344_rte_textual_entailment": 49.0, "eval_f1_for_task1345_qqp_question_rewriting": 29.4827, "eval_f1_for_task1356_xlsum_title_generation": 10.9638, "eval_f1_for_task1358_xlsum_title_generation": 30.55, "eval_f1_for_task1385_anli_textual_entailment": 26.3333, "eval_f1_for_task1386_anli_textual_entailment": 28.3333, "eval_f1_for_task1387_anli_textual_entailment": 34.3333, "eval_f1_for_task1388_cb_textual_entailment": 17.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 9.3333, "eval_f1_for_task1391_winogrande_coreference_resolution": 0.0, "eval_f1_for_task1393_copa_cause_effect_classification": 13.2132, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 4.3635, "eval_f1_for_task1407_dart_data_to_text": 39.5949, "eval_f1_for_task1409_dart_data_to_text": 49.6952, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.6865, "eval_f1_for_task1439_doqa_answerability_classification": 0.7747, "eval_f1_for_task1442_doqa_answerability_classification": 1.2415, "eval_f1_for_task1516_imppres_textual_entailment": 25.2597, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 12.3333, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 42.092, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 6.9134, "eval_f1_for_task1554_scitail_textual_entailment": 50.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 78.0581, "eval_f1_for_task1562_zest_question_rewriting": 56.7024, "eval_f1_for_task1586_scifact_title_generation": 27.0855, "eval_f1_for_task1598_nyc_data_to_text": 50.8643, "eval_f1_for_task1612_sick_textual_entailment": 29.4683, "eval_f1_for_task1615_sick_textual_entailment": 33.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 74.2143, "eval_f1_for_task1624_disfl_qa_answerability_classification": 27.5376, "eval_f1_for_task1631_open_pi_data_to_text": 59.8987, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 6.6321, "eval_f1_for_task1659_billsum_title_generation": 15.619, "eval_f1_for_task1664_wino_bias_coreference_resolution": 42.869, "eval_f1_for_task1728_web_nlg_data_to_text": 56.7336, "eval_f1_for_task190_snli_textual_entailment": 1.0, "eval_f1_for_task199_multinli_textual_entailment": 42.0, "eval_f1_for_task200_multinli_textual_entailment": 19.0, "eval_f1_for_task201_multinli_textual_entailment": 33.091, "eval_f1_for_task202_multinli_textual_entailment": 33.0, "eval_f1_for_task219_rocstories_title_generation": 15.9372, "eval_f1_for_task220_rocstories_title_generation": 69.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 1.204, "eval_f1_for_task232_iirc_answerability_classification": 3.0809, "eval_f1_for_task233_iirc_answerability_classification": 1.5216, "eval_f1_for_task242_tweetqa_answerability_classification": 48.6667, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 33.0863, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 44.3879, "eval_f1_for_task288_gigaword_title_generation": 28.1833, "eval_f1_for_task290_tellmewhy_answerability_classification": 3.3852, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 6.7603, "eval_f1_for_task329_gap_coreference_resolution": 33.0858, "eval_f1_for_task330_gap_coreference_resolution": 12.4557, "eval_f1_for_task349_squad2.0_answerability_classification": 0.1806, "eval_f1_for_task362_spolin_dialogue_act_recognition": 43.1081, "eval_f1_for_task391_cod3s_cause_effect_classification": 44.0, "eval_f1_for_task392_cod3s_cause_effect_classification": 2.38, "eval_f1_for_task393_cod3s_cause_effect_classification": 25.0539, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 4.187, "eval_f1_for_task402_grailqa_question_rewriting": 36.4946, "eval_f1_for_task418_persent_title_generation": 14.0591, "eval_f1_for_task442_com_qa_question_rewriting": 58.488, "eval_f1_for_task500_scruples_title_generation": 12.9241, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 18.9861, "eval_f1_for_task520_aquamuse_answerability_classification": 28.8428, "eval_f1_for_task569_recipe_nlg_title_generation": 13.6453, "eval_f1_for_task602_wikitext_title_generation": 5.7439, "eval_f1_for_task613_liar_keyword_tagging": 19.6667, "eval_f1_for_task614_glucose_cause_effect_classification": 41.6164, "eval_f1_for_task619_ohsumed_title_generation": 21.4605, "eval_f1_for_task620_ohsumed_keyword_tagging": 3.6862, "eval_f1_for_task623_ohsumed_keyword_tagging": 1.5811, "eval_f1_for_task640_e_snli_textual_entailment": 37.0, "eval_f1_for_task641_e_snli_textual_entailment": 0.0, "eval_f1_for_task642_e_snli_textual_entailment": 18.6667, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 65.0262, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 29.0474, "eval_f1_for_task670_ambigqa_question_rewriting": 78.4861, "eval_f1_for_task671_ambigqa_question_rewriting": 66.6034, "eval_f1_for_task677_ollie_data_to_text": 36.0797, "eval_f1_for_task738_perspectrum_textual_entailment": 56.6667, "eval_f1_for_task743_eurlex_title_generation": 20.7992, "eval_f1_for_task760_msr_sqa_data_to_text": 7.9571, "eval_f1_for_task769_qed_title_generation": 36.352, "eval_f1_for_task827_copa_cause_effect_classification": 0.069, "eval_f1_for_task828_copa_cause_effect_classification": 31.7333, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.4989, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 27.1888, "eval_f1_for_task890_gwsd_textual_entailment": 33.0, "eval_f1_for_task891_gap_coreference_resolution": 24.5854, "eval_f1_for_task892_gap_coreference_resolution": 34.1894, "eval_f1_for_task893_gap_coreference_resolution": 13.3291, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 48.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 5.766, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 45.0664, "eval_f1_for_task957_e2e_data_to_text": 40.8664, "eval_f1_for_task970_sherliic_textual_entailment": 0.0, "eval_f1_for_textual_entailment": 29.791, "eval_f1_for_title_generation": 21.6019, "eval_f1_for_word_analogy": 2.375, "eval_gen_len": 37.0505, "eval_global_step": 1, "eval_loss": 5.291327953338623, "eval_rouge1": 29.1206, "eval_rouge1_for_answerability_classification": 10.3389, "eval_rouge1_for_cause_effect_classification": 28.0063, "eval_rouge1_for_coreference_resolution": 18.3029, "eval_rouge1_for_data_to_text": 50.9172, "eval_rouge1_for_dialogue_act_recognition": 32.5851, "eval_rouge1_for_grammar_error_correction": 58.8557, "eval_rouge1_for_keyword_tagging": 31.5336, "eval_rouge1_for_overlap_extraction": 36.3246, "eval_rouge1_for_question_rewriting": 58.8996, "eval_rouge1_for_task020_mctaco_answerability_classification": 11.1774, "eval_rouge1_for_task033_winogrande_coreference_resolution": 6.0, "eval_rouge1_for_task034_winogrande_question_rewriting": 47.3041, "eval_rouge1_for_task035_winogrande_question_rewriting": 67.857, "eval_rouge1_for_task036_qasc_keyword_tagging": 52.4004, "eval_rouge1_for_task039_qasc_overlap_extraction": 27.5667, "eval_rouge1_for_task050_multirc_answerability_classification": 0.4172, "eval_rouge1_for_task102_commongen_data_to_text": 72.8784, "eval_rouge1_for_task1152_bard_word_analogy": 6.0, "eval_rouge1_for_task1153_bard_word_analogy": 2.0, "eval_rouge1_for_task1154_bard_word_analogy": 0.0, "eval_rouge1_for_task1155_bard_word_analogy": 0.0, "eval_rouge1_for_task1156_bard_word_analogy": 5.0, "eval_rouge1_for_task1157_bard_word_analogy": 0.0, "eval_rouge1_for_task1158_bard_word_analogy": 0.0, "eval_rouge1_for_task1159_bard_word_analogy": 6.0, "eval_rouge1_for_task1161_coda_19_title_generation": 30.3334, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 73.1844, "eval_rouge1_for_task121_atomic_question_rewriting": 45.3424, "eval_rouge1_for_task133_winowhy_coreference_resolution": 2.4723, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 11.7591, "eval_rouge1_for_task1344_rte_textual_entailment": 49.6667, "eval_rouge1_for_task1345_qqp_question_rewriting": 32.2276, "eval_rouge1_for_task1356_xlsum_title_generation": 13.3405, "eval_rouge1_for_task1358_xlsum_title_generation": 35.4996, "eval_rouge1_for_task1385_anli_textual_entailment": 26.3333, "eval_rouge1_for_task1386_anli_textual_entailment": 28.3333, "eval_rouge1_for_task1387_anli_textual_entailment": 34.3333, "eval_rouge1_for_task1388_cb_textual_entailment": 17.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 9.3333, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 0.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 13.2132, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 6.8968, "eval_rouge1_for_task1407_dart_data_to_text": 40.2637, "eval_rouge1_for_task1409_dart_data_to_text": 51.6783, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.3109, "eval_rouge1_for_task1439_doqa_answerability_classification": 0.7315, "eval_rouge1_for_task1442_doqa_answerability_classification": 1.1868, "eval_rouge1_for_task1516_imppres_textual_entailment": 25.2438, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 12.3333, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 42.0843, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 7.9958, "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 81.4005, "eval_rouge1_for_task1562_zest_question_rewriting": 59.3918, "eval_rouge1_for_task1586_scifact_title_generation": 29.5348, "eval_rouge1_for_task1598_nyc_data_to_text": 51.2268, "eval_rouge1_for_task1612_sick_textual_entailment": 29.4366, "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 75.687, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 27.5307, "eval_rouge1_for_task1631_open_pi_data_to_text": 60.5069, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 6.6211, "eval_rouge1_for_task1659_billsum_title_generation": 16.9264, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 42.869, "eval_rouge1_for_task1728_web_nlg_data_to_text": 60.0359, "eval_rouge1_for_task190_snli_textual_entailment": 1.0, "eval_rouge1_for_task199_multinli_textual_entailment": 42.0, "eval_rouge1_for_task200_multinli_textual_entailment": 29.0, "eval_rouge1_for_task201_multinli_textual_entailment": 33.0813, "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, "eval_rouge1_for_task219_rocstories_title_generation": 19.3987, "eval_rouge1_for_task220_rocstories_title_generation": 69.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 1.1915, "eval_rouge1_for_task232_iirc_answerability_classification": 3.0453, "eval_rouge1_for_task233_iirc_answerability_classification": 1.5018, "eval_rouge1_for_task242_tweetqa_answerability_classification": 48.6667, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 33.1409, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 45.0825, "eval_rouge1_for_task288_gigaword_title_generation": 30.8148, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 3.3574, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 8.7702, "eval_rouge1_for_task329_gap_coreference_resolution": 33.0833, "eval_rouge1_for_task330_gap_coreference_resolution": 12.4842, "eval_rouge1_for_task349_squad2.0_answerability_classification": 0.1761, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 43.0952, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 44.0, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 2.38, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 25.2299, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 4.8867, "eval_rouge1_for_task402_grailqa_question_rewriting": 37.5779, "eval_rouge1_for_task418_persent_title_generation": 16.5055, "eval_rouge1_for_task442_com_qa_question_rewriting": 62.6952, "eval_rouge1_for_task500_scruples_title_generation": 14.2241, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 19.3933, "eval_rouge1_for_task520_aquamuse_answerability_classification": 28.8017, "eval_rouge1_for_task569_recipe_nlg_title_generation": 14.3747, "eval_rouge1_for_task602_wikitext_title_generation": 6.1492, "eval_rouge1_for_task613_liar_keyword_tagging": 32.9, "eval_rouge1_for_task614_glucose_cause_effect_classification": 46.0935, "eval_rouge1_for_task619_ohsumed_title_generation": 22.6941, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 4.1693, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 1.5579, "eval_rouge1_for_task640_e_snli_textual_entailment": 37.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 0.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 18.6667, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 66.6405, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 31.1018, "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.2801, "eval_rouge1_for_task671_ambigqa_question_rewriting": 67.3485, "eval_rouge1_for_task677_ollie_data_to_text": 39.0409, "eval_rouge1_for_task738_perspectrum_textual_entailment": 56.6667, "eval_rouge1_for_task743_eurlex_title_generation": 21.938, "eval_rouge1_for_task760_msr_sqa_data_to_text": 8.4717, "eval_rouge1_for_task769_qed_title_generation": 36.3898, "eval_rouge1_for_task827_copa_cause_effect_classification": 33.3939, "eval_rouge1_for_task828_copa_cause_effect_classification": 31.7333, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.4971, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 32.1891, "eval_rouge1_for_task890_gwsd_textual_entailment": 33.0, "eval_rouge1_for_task891_gap_coreference_resolution": 24.5854, "eval_rouge1_for_task892_gap_coreference_resolution": 34.1855, "eval_rouge1_for_task893_gap_coreference_resolution": 13.3274, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 48.1277, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 37.0659, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 46.0631, "eval_rouge1_for_task957_e2e_data_to_text": 42.7422, "eval_rouge1_for_task970_sherliic_textual_entailment": 0.0, "eval_rouge1_for_textual_entailment": 33.4452, "eval_rouge1_for_title_generation": 23.2785, "eval_rouge1_for_word_analogy": 2.375, "eval_rougeL": 27.7393, "eval_rougeL_for_answerability_classification": 10.3389, "eval_rougeL_for_cause_effect_classification": 26.592, "eval_rougeL_for_coreference_resolution": 17.8994, "eval_rougeL_for_data_to_text": 43.8964, "eval_rougeL_for_dialogue_act_recognition": 32.5786, "eval_rougeL_for_grammar_error_correction": 57.8909, "eval_rougeL_for_keyword_tagging": 30.53, "eval_rougeL_for_overlap_extraction": 35.5881, "eval_rougeL_for_question_rewriting": 55.0234, "eval_rougeL_for_task020_mctaco_answerability_classification": 11.1774, "eval_rougeL_for_task033_winogrande_coreference_resolution": 6.0, "eval_rougeL_for_task034_winogrande_question_rewriting": 45.5354, "eval_rougeL_for_task035_winogrande_question_rewriting": 63.5532, "eval_rougeL_for_task036_qasc_keyword_tagging": 49.8579, "eval_rougeL_for_task039_qasc_overlap_extraction": 27.5667, "eval_rougeL_for_task050_multirc_answerability_classification": 0.4172, "eval_rougeL_for_task102_commongen_data_to_text": 66.3654, "eval_rougeL_for_task1152_bard_word_analogy": 6.0, "eval_rougeL_for_task1153_bard_word_analogy": 2.0, "eval_rougeL_for_task1154_bard_word_analogy": 0.0, "eval_rougeL_for_task1155_bard_word_analogy": 0.0, "eval_rougeL_for_task1156_bard_word_analogy": 5.0, "eval_rougeL_for_task1157_bard_word_analogy": 0.0, "eval_rougeL_for_task1158_bard_word_analogy": 0.0, "eval_rougeL_for_task1159_bard_word_analogy": 6.0, "eval_rougeL_for_task1161_coda_19_title_generation": 24.6691, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 71.1701, "eval_rougeL_for_task121_atomic_question_rewriting": 40.7211, "eval_rougeL_for_task133_winowhy_coreference_resolution": 2.4723, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 10.8301, "eval_rougeL_for_task1344_rte_textual_entailment": 49.6667, "eval_rougeL_for_task1345_qqp_question_rewriting": 28.5457, "eval_rougeL_for_task1356_xlsum_title_generation": 11.2576, "eval_rougeL_for_task1358_xlsum_title_generation": 30.0572, "eval_rougeL_for_task1385_anli_textual_entailment": 26.3333, "eval_rougeL_for_task1386_anli_textual_entailment": 28.3333, "eval_rougeL_for_task1387_anli_textual_entailment": 34.3333, "eval_rougeL_for_task1388_cb_textual_entailment": 17.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 9.3333, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 0.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 13.2132, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 6.8511, "eval_rougeL_for_task1407_dart_data_to_text": 35.9165, "eval_rougeL_for_task1409_dart_data_to_text": 43.3984, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.9628, "eval_rougeL_for_task1439_doqa_answerability_classification": 0.7315, "eval_rougeL_for_task1442_doqa_answerability_classification": 1.1868, "eval_rougeL_for_task1516_imppres_textual_entailment": 25.2438, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 12.3333, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 42.0843, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 6.6033, "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 79.819, "eval_rougeL_for_task1562_zest_question_rewriting": 52.0702, "eval_rougeL_for_task1586_scifact_title_generation": 24.3587, "eval_rougeL_for_task1598_nyc_data_to_text": 41.3066, "eval_rougeL_for_task1612_sick_textual_entailment": 29.4366, "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 73.9174, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 27.5307, "eval_rougeL_for_task1631_open_pi_data_to_text": 58.5352, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 6.6211, "eval_rougeL_for_task1659_billsum_title_generation": 13.3259, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 38.3452, "eval_rougeL_for_task1728_web_nlg_data_to_text": 50.6113, "eval_rougeL_for_task190_snli_textual_entailment": 1.0, "eval_rougeL_for_task199_multinli_textual_entailment": 42.0, "eval_rougeL_for_task200_multinli_textual_entailment": 29.0, "eval_rougeL_for_task201_multinli_textual_entailment": 33.0813, "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, "eval_rougeL_for_task219_rocstories_title_generation": 19.3767, "eval_rougeL_for_task220_rocstories_title_generation": 69.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 1.1915, "eval_rougeL_for_task232_iirc_answerability_classification": 3.0453, "eval_rougeL_for_task233_iirc_answerability_classification": 1.5018, "eval_rougeL_for_task242_tweetqa_answerability_classification": 48.6667, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 32.1885, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 43.6095, "eval_rougeL_for_task288_gigaword_title_generation": 26.5353, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 3.3574, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 8.7435, "eval_rougeL_for_task329_gap_coreference_resolution": 33.0833, "eval_rougeL_for_task330_gap_coreference_resolution": 12.4842, "eval_rougeL_for_task349_squad2.0_answerability_classification": 0.1761, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 43.0952, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 44.0, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 2.38, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 23.1146, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 4.8867, "eval_rougeL_for_task402_grailqa_question_rewriting": 31.8321, "eval_rougeL_for_task418_persent_title_generation": 15.0422, "eval_rougeL_for_task442_com_qa_question_rewriting": 53.8628, "eval_rougeL_for_task500_scruples_title_generation": 12.8831, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 18.8608, "eval_rougeL_for_task520_aquamuse_answerability_classification": 28.8017, "eval_rougeL_for_task569_recipe_nlg_title_generation": 13.5607, "eval_rougeL_for_task602_wikitext_title_generation": 6.0645, "eval_rougeL_for_task613_liar_keyword_tagging": 32.9, "eval_rougeL_for_task614_glucose_cause_effect_classification": 38.3087, "eval_rougeL_for_task619_ohsumed_title_generation": 19.0442, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 3.8938, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 1.5579, "eval_rougeL_for_task640_e_snli_textual_entailment": 37.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 0.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 18.6667, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 64.4405, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 30.9557, "eval_rougeL_for_task670_ambigqa_question_rewriting": 77.8817, "eval_rougeL_for_task671_ambigqa_question_rewriting": 66.1674, "eval_rougeL_for_task677_ollie_data_to_text": 32.3323, "eval_rougeL_for_task738_perspectrum_textual_entailment": 56.6667, "eval_rougeL_for_task743_eurlex_title_generation": 18.6084, "eval_rougeL_for_task760_msr_sqa_data_to_text": 7.392, "eval_rougeL_for_task769_qed_title_generation": 36.3221, "eval_rougeL_for_task827_copa_cause_effect_classification": 33.3939, "eval_rougeL_for_task828_copa_cause_effect_classification": 31.7333, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.4971, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 32.1891, "eval_rougeL_for_task890_gwsd_textual_entailment": 33.0, "eval_rougeL_for_task891_gap_coreference_resolution": 24.5854, "eval_rougeL_for_task892_gap_coreference_resolution": 34.1855, "eval_rougeL_for_task893_gap_coreference_resolution": 13.3274, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 48.1277, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 37.0659, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 46.0631, "eval_rougeL_for_task957_e2e_data_to_text": 32.1967, "eval_rougeL_for_task970_sherliic_textual_entailment": 0.0, "eval_rougeL_for_textual_entailment": 33.4452, "eval_rougeL_for_title_generation": 21.0443, "eval_rougeL_for_word_analogy": 2.375, "eval_runtime": 2925.679, "eval_samples_per_second": 4.071, "eval_steps_per_second": 0.255, "step": 1 }, { "epoch": 0.01, "learning_rate": 5e-05, "loss": 1.7795, "step": 50 }, { "epoch": 0.01, "eval_exact_match": 28.8245, "eval_exact_match_for_answerability_classification": 49.8462, "eval_exact_match_for_cause_effect_classification": 35.1429, "eval_exact_match_for_coreference_resolution": 36.7857, "eval_exact_match_for_data_to_text": 7.8692, "eval_exact_match_for_dialogue_act_recognition": 46.7143, "eval_exact_match_for_grammar_error_correction": 9.0, "eval_exact_match_for_keyword_tagging": 34.2, "eval_exact_match_for_overlap_extraction": 12.5, "eval_exact_match_for_question_rewriting": 1.9091, "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 43.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 10.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 25.0, "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 38.0, "eval_exact_match_for_task1153_bard_word_analogy": 31.0, "eval_exact_match_for_task1154_bard_word_analogy": 31.0, "eval_exact_match_for_task1155_bard_word_analogy": 48.0, "eval_exact_match_for_task1156_bard_word_analogy": 50.0, "eval_exact_match_for_task1157_bard_word_analogy": 45.0, "eval_exact_match_for_task1158_bard_word_analogy": 39.0, "eval_exact_match_for_task1159_bard_word_analogy": 23.0, "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 2.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, "eval_exact_match_for_task1386_anli_textual_entailment": 30.0, "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 16.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 46.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 63.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 0.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 47.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 32.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 39.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 18.0, "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 60.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_exact_match_for_task1659_billsum_title_generation": 16.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 31.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, "eval_exact_match_for_task190_snli_textual_entailment": 0.0, "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, "eval_exact_match_for_task219_rocstories_title_generation": 10.0, "eval_exact_match_for_task220_rocstories_title_generation": 50.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_exact_match_for_task232_iirc_answerability_classification": 47.0, "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 45.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 25.0, "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, "eval_exact_match_for_task330_gap_coreference_resolution": 55.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 31.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, "eval_exact_match_for_task418_persent_title_generation": 2.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 7.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 9.0, "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 25.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 32.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 39.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 66.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 17.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 56.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 24.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, "eval_exact_match_for_task891_gap_coreference_resolution": 48.0, "eval_exact_match_for_task892_gap_coreference_resolution": 45.0, "eval_exact_match_for_task893_gap_coreference_resolution": 26.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, "eval_exact_match_for_textual_entailment": 38.875, "eval_exact_match_for_title_generation": 8.9126, "eval_exact_match_for_word_analogy": 38.125, "eval_f1": 45.8773, "eval_f1_for_answerability_classification": 52.4103, "eval_f1_for_cause_effect_classification": 55.243, "eval_f1_for_coreference_resolution": 46.8827, "eval_f1_for_data_to_text": 53.5908, "eval_f1_for_dialogue_act_recognition": 50.2857, "eval_f1_for_grammar_error_correction": 57.6091, "eval_f1_for_keyword_tagging": 49.0784, "eval_f1_for_overlap_extraction": 41.7877, "eval_f1_for_question_rewriting": 63.4065, "eval_f1_for_task020_mctaco_answerability_classification": 50.0, "eval_f1_for_task033_winogrande_coreference_resolution": 43.0, "eval_f1_for_task034_winogrande_question_rewriting": 22.4226, "eval_f1_for_task035_winogrande_question_rewriting": 84.4994, "eval_f1_for_task036_qasc_keyword_tagging": 56.1589, "eval_f1_for_task039_qasc_overlap_extraction": 30.0, "eval_f1_for_task050_multirc_answerability_classification": 50.0, "eval_f1_for_task102_commongen_data_to_text": 55.9256, "eval_f1_for_task1152_bard_word_analogy": 38.0, "eval_f1_for_task1153_bard_word_analogy": 31.0, "eval_f1_for_task1154_bard_word_analogy": 31.0, "eval_f1_for_task1155_bard_word_analogy": 48.0, "eval_f1_for_task1156_bard_word_analogy": 50.0, "eval_f1_for_task1157_bard_word_analogy": 45.0, "eval_f1_for_task1158_bard_word_analogy": 39.0, "eval_f1_for_task1159_bard_word_analogy": 23.0, "eval_f1_for_task1161_coda_19_title_generation": 27.36, "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.7283, "eval_f1_for_task121_atomic_question_rewriting": 49.3193, "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 4.0677, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 38.3473, "eval_f1_for_task1356_xlsum_title_generation": 14.117, "eval_f1_for_task1358_xlsum_title_generation": 32.2752, "eval_f1_for_task1385_anli_textual_entailment": 34.0, "eval_f1_for_task1386_anli_textual_entailment": 30.0, "eval_f1_for_task1387_anli_textual_entailment": 33.0, "eval_f1_for_task1388_cb_textual_entailment": 20.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 16.0, "eval_f1_for_task1393_copa_cause_effect_classification": 46.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 63.0, "eval_f1_for_task1407_dart_data_to_text": 40.4575, "eval_f1_for_task1409_dart_data_to_text": 54.5805, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.1366, "eval_f1_for_task1439_doqa_answerability_classification": 47.0, "eval_f1_for_task1442_doqa_answerability_classification": 57.0, "eval_f1_for_task1516_imppres_textual_entailment": 32.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 30.5024, "eval_f1_for_task1554_scitail_textual_entailment": 50.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 85.0815, "eval_f1_for_task1562_zest_question_rewriting": 55.1795, "eval_f1_for_task1586_scifact_title_generation": 29.6129, "eval_f1_for_task1598_nyc_data_to_text": 49.205, "eval_f1_for_task1612_sick_textual_entailment": 33.0, "eval_f1_for_task1615_sick_textual_entailment": 33.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.8462, "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, "eval_f1_for_task1631_open_pi_data_to_text": 96.0011, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_f1_for_task1659_billsum_title_generation": 42.1185, "eval_f1_for_task1664_wino_bias_coreference_resolution": 72.0476, "eval_f1_for_task1728_web_nlg_data_to_text": 58.5367, "eval_f1_for_task190_snli_textual_entailment": 0.0, "eval_f1_for_task199_multinli_textual_entailment": 50.0, "eval_f1_for_task200_multinli_textual_entailment": 34.0, "eval_f1_for_task201_multinli_textual_entailment": 34.0, "eval_f1_for_task202_multinli_textual_entailment": 33.0, "eval_f1_for_task219_rocstories_title_generation": 24.8452, "eval_f1_for_task220_rocstories_title_generation": 50.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_f1_for_task232_iirc_answerability_classification": 47.0, "eval_f1_for_task233_iirc_answerability_classification": 50.0, "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 57.5048, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 53.5754, "eval_f1_for_task288_gigaword_title_generation": 31.44, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 60.7, "eval_f1_for_task329_gap_coreference_resolution": 33.0, "eval_f1_for_task330_gap_coreference_resolution": 61.3524, "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 33.0203, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 31.0, "eval_f1_for_task402_grailqa_question_rewriting": 82.6095, "eval_f1_for_task418_persent_title_generation": 26.2959, "eval_f1_for_task442_com_qa_question_rewriting": 69.9745, "eval_f1_for_task500_scruples_title_generation": 19.3028, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 44.9958, "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, "eval_f1_for_task569_recipe_nlg_title_generation": 43.6637, "eval_f1_for_task602_wikitext_title_generation": 13.786, "eval_f1_for_task613_liar_keyword_tagging": 21.3333, "eval_f1_for_task614_glucose_cause_effect_classification": 41.0144, "eval_f1_for_task619_ohsumed_title_generation": 36.9998, "eval_f1_for_task620_ohsumed_keyword_tagging": 45.5, "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, "eval_f1_for_task640_e_snli_textual_entailment": 32.0, "eval_f1_for_task641_e_snli_textual_entailment": 39.0, "eval_f1_for_task642_e_snli_textual_entailment": 50.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 72.4, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 54.553, "eval_f1_for_task670_ambigqa_question_rewriting": 73.6919, "eval_f1_for_task671_ambigqa_question_rewriting": 63.8535, "eval_f1_for_task677_ollie_data_to_text": 35.0121, "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, "eval_f1_for_task743_eurlex_title_generation": 30.4233, "eval_f1_for_task760_msr_sqa_data_to_text": 4.4007, "eval_f1_for_task769_qed_title_generation": 65.6746, "eval_f1_for_task827_copa_cause_effect_classification": 50.0, "eval_f1_for_task828_copa_cause_effect_classification": 50.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 24.0, "eval_f1_for_task890_gwsd_textual_entailment": 34.0, "eval_f1_for_task891_gap_coreference_resolution": 56.2, "eval_f1_for_task892_gap_coreference_resolution": 45.0, "eval_f1_for_task893_gap_coreference_resolution": 26.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, "eval_f1_for_task957_e2e_data_to_text": 51.7969, "eval_f1_for_task970_sherliic_textual_entailment": 50.0, "eval_f1_for_textual_entailment": 38.875, "eval_f1_for_title_generation": 31.6858, "eval_f1_for_word_analogy": 38.125, "eval_gen_len": 9.6505, "eval_global_step": 50, "eval_loss": 1.0859836339950562, "eval_rouge1": 47.7651, "eval_rouge1_for_answerability_classification": 52.4103, "eval_rouge1_for_cause_effect_classification": 56.2252, "eval_rouge1_for_coreference_resolution": 48.4493, "eval_rouge1_for_data_to_text": 56.2778, "eval_rouge1_for_dialogue_act_recognition": 52.6095, "eval_rouge1_for_grammar_error_correction": 62.6525, "eval_rouge1_for_keyword_tagging": 55.0872, "eval_rouge1_for_overlap_extraction": 44.4977, "eval_rouge1_for_question_rewriting": 65.0636, "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 46.0, "eval_rouge1_for_task034_winogrande_question_rewriting": 22.3852, "eval_rouge1_for_task035_winogrande_question_rewriting": 85.4302, "eval_rouge1_for_task036_qasc_keyword_tagging": 66.4027, "eval_rouge1_for_task039_qasc_overlap_extraction": 34.5, "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, "eval_rouge1_for_task102_commongen_data_to_text": 69.0714, "eval_rouge1_for_task1152_bard_word_analogy": 38.0, "eval_rouge1_for_task1153_bard_word_analogy": 31.0, "eval_rouge1_for_task1154_bard_word_analogy": 31.0, "eval_rouge1_for_task1155_bard_word_analogy": 48.0, "eval_rouge1_for_task1156_bard_word_analogy": 50.0, "eval_rouge1_for_task1157_bard_word_analogy": 45.0, "eval_rouge1_for_task1158_bard_word_analogy": 39.0, "eval_rouge1_for_task1159_bard_word_analogy": 23.0, "eval_rouge1_for_task1161_coda_19_title_generation": 30.4162, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.5143, "eval_rouge1_for_task121_atomic_question_rewriting": 51.9403, "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 5.0587, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 41.6476, "eval_rouge1_for_task1356_xlsum_title_generation": 16.7345, "eval_rouge1_for_task1358_xlsum_title_generation": 37.3382, "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, "eval_rouge1_for_task1386_anli_textual_entailment": 30.0, "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 16.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 46.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 66.6, "eval_rouge1_for_task1407_dart_data_to_text": 41.4469, "eval_rouge1_for_task1409_dart_data_to_text": 55.3365, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.8846, "eval_rouge1_for_task1439_doqa_answerability_classification": 47.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 57.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 32.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 33.5822, "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 88.4204, "eval_rouge1_for_task1562_zest_question_rewriting": 57.9884, "eval_rouge1_for_task1586_scifact_title_generation": 33.0182, "eval_rouge1_for_task1598_nyc_data_to_text": 49.891, "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.2917, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 96.0212, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_rouge1_for_task1659_billsum_title_generation": 43.1233, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 72.0476, "eval_rouge1_for_task1728_web_nlg_data_to_text": 60.2014, "eval_rouge1_for_task190_snli_textual_entailment": 0.0, "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, "eval_rouge1_for_task219_rocstories_title_generation": 33.2095, "eval_rouge1_for_task220_rocstories_title_generation": 50.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_rouge1_for_task232_iirc_answerability_classification": 47.0, "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 58.35, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 54.4954, "eval_rouge1_for_task288_gigaword_title_generation": 34.1184, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 64.5333, "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, "eval_rouge1_for_task330_gap_coreference_resolution": 61.8524, "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.8388, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 44.6667, "eval_rouge1_for_task402_grailqa_question_rewriting": 84.6352, "eval_rouge1_for_task418_persent_title_generation": 30.1832, "eval_rouge1_for_task442_com_qa_question_rewriting": 73.7861, "eval_rouge1_for_task500_scruples_title_generation": 21.3318, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 45.4941, "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.8698, "eval_rouge1_for_task602_wikitext_title_generation": 14.2789, "eval_rouge1_for_task613_liar_keyword_tagging": 36.8333, "eval_rouge1_for_task614_glucose_cause_effect_classification": 47.0709, "eval_rouge1_for_task619_ohsumed_title_generation": 40.0765, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 49.8, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 32.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 39.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 72.4, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 54.6398, "eval_rouge1_for_task670_ambigqa_question_rewriting": 74.4893, "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.5915, "eval_rouge1_for_task677_ollie_data_to_text": 37.5922, "eval_rouge1_for_task738_perspectrum_textual_entailment": 56.0, "eval_rouge1_for_task743_eurlex_title_generation": 32.1168, "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.7658, "eval_rouge1_for_task769_qed_title_generation": 65.7329, "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, "eval_rouge1_for_task891_gap_coreference_resolution": 56.2, "eval_rouge1_for_task892_gap_coreference_resolution": 45.0, "eval_rouge1_for_task893_gap_coreference_resolution": 26.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, "eval_rouge1_for_task957_e2e_data_to_text": 54.0547, "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, "eval_rouge1_for_textual_entailment": 40.9861, "eval_rouge1_for_title_generation": 34.1031, "eval_rouge1_for_word_analogy": 38.125, "eval_rougeL": 46.3793, "eval_rougeL_for_answerability_classification": 52.4103, "eval_rougeL_for_cause_effect_classification": 55.3664, "eval_rougeL_for_coreference_resolution": 48.4255, "eval_rougeL_for_data_to_text": 48.4025, "eval_rougeL_for_dialogue_act_recognition": 52.6095, "eval_rougeL_for_grammar_error_correction": 62.0352, "eval_rougeL_for_keyword_tagging": 54.3437, "eval_rougeL_for_overlap_extraction": 43.4147, "eval_rougeL_for_question_rewriting": 61.2412, "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 46.0, "eval_rougeL_for_task034_winogrande_question_rewriting": 21.9629, "eval_rougeL_for_task035_winogrande_question_rewriting": 84.1953, "eval_rougeL_for_task036_qasc_keyword_tagging": 64.4186, "eval_rougeL_for_task039_qasc_overlap_extraction": 34.5, "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, "eval_rougeL_for_task102_commongen_data_to_text": 60.1824, "eval_rougeL_for_task1152_bard_word_analogy": 38.0, "eval_rougeL_for_task1153_bard_word_analogy": 31.0, "eval_rougeL_for_task1154_bard_word_analogy": 31.0, "eval_rougeL_for_task1155_bard_word_analogy": 48.0, "eval_rougeL_for_task1156_bard_word_analogy": 50.0, "eval_rougeL_for_task1157_bard_word_analogy": 45.0, "eval_rougeL_for_task1158_bard_word_analogy": 39.0, "eval_rougeL_for_task1159_bard_word_analogy": 23.0, "eval_rougeL_for_task1161_coda_19_title_generation": 24.1707, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.9398, "eval_rougeL_for_task121_atomic_question_rewriting": 46.6718, "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 4.9872, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 38.5578, "eval_rougeL_for_task1356_xlsum_title_generation": 14.2242, "eval_rougeL_for_task1358_xlsum_title_generation": 31.155, "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, "eval_rougeL_for_task1386_anli_textual_entailment": 30.0, "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 16.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 46.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 66.6, "eval_rougeL_for_task1407_dart_data_to_text": 33.3412, "eval_rougeL_for_task1409_dart_data_to_text": 45.4825, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.5558, "eval_rougeL_for_task1439_doqa_answerability_classification": 47.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 57.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 32.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 39.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 31.5569, "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 87.5146, "eval_rougeL_for_task1562_zest_question_rewriting": 51.1022, "eval_rougeL_for_task1586_scifact_title_generation": 26.4155, "eval_rougeL_for_task1598_nyc_data_to_text": 39.8804, "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.0127, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 95.6945, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_rougeL_for_task1659_billsum_title_generation": 39.8852, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 72.0476, "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.44, "eval_rougeL_for_task190_snli_textual_entailment": 0.0, "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, "eval_rougeL_for_task219_rocstories_title_generation": 33.2095, "eval_rougeL_for_task220_rocstories_title_generation": 50.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_rougeL_for_task232_iirc_answerability_classification": 47.0, "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 58.35, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 52.3294, "eval_rougeL_for_task288_gigaword_title_generation": 30.1322, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 64.5333, "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, "eval_rougeL_for_task330_gap_coreference_resolution": 61.8524, "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.2022, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 44.6667, "eval_rougeL_for_task402_grailqa_question_rewriting": 70.9514, "eval_rougeL_for_task418_persent_title_generation": 26.8869, "eval_rougeL_for_task442_com_qa_question_rewriting": 68.6037, "eval_rougeL_for_task500_scruples_title_generation": 19.7373, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 44.9387, "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 43.6247, "eval_rougeL_for_task602_wikitext_title_generation": 14.2271, "eval_rougeL_for_task613_liar_keyword_tagging": 36.8333, "eval_rougeL_for_task614_glucose_cause_effect_classification": 42.6963, "eval_rougeL_for_task619_ohsumed_title_generation": 36.4204, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 48.0667, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 32.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 39.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 72.4, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 54.3064, "eval_rougeL_for_task670_ambigqa_question_rewriting": 72.5495, "eval_rougeL_for_task671_ambigqa_question_rewriting": 63.1059, "eval_rougeL_for_task677_ollie_data_to_text": 30.2559, "eval_rougeL_for_task738_perspectrum_textual_entailment": 56.0, "eval_rougeL_for_task743_eurlex_title_generation": 28.8844, "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.2031, "eval_rougeL_for_task769_qed_title_generation": 65.7329, "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, "eval_rougeL_for_task891_gap_coreference_resolution": 56.2, "eval_rougeL_for_task892_gap_coreference_resolution": 45.0, "eval_rougeL_for_task893_gap_coreference_resolution": 26.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, "eval_rougeL_for_task957_e2e_data_to_text": 41.4348, "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, "eval_rougeL_for_textual_entailment": 40.9861, "eval_rougeL_for_title_generation": 31.6094, "eval_rougeL_for_word_analogy": 38.125, "eval_runtime": 1011.8388, "eval_samples_per_second": 11.771, "eval_steps_per_second": 0.736, "step": 50 }, { "epoch": 0.02, "learning_rate": 5e-05, "loss": 1.3041, "step": 100 }, { "epoch": 0.02, "eval_exact_match": 29.2611, "eval_exact_match_for_answerability_classification": 49.3846, "eval_exact_match_for_cause_effect_classification": 36.5714, "eval_exact_match_for_coreference_resolution": 36.2143, "eval_exact_match_for_data_to_text": 7.8692, "eval_exact_match_for_dialogue_act_recognition": 47.4286, "eval_exact_match_for_grammar_error_correction": 8.5, "eval_exact_match_for_keyword_tagging": 34.8, "eval_exact_match_for_overlap_extraction": 11.0, "eval_exact_match_for_question_rewriting": 2.2727, "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 41.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 22.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 22.0, "eval_exact_match_for_task050_multirc_answerability_classification": 51.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 34.0, "eval_exact_match_for_task1153_bard_word_analogy": 28.0, "eval_exact_match_for_task1154_bard_word_analogy": 27.0, "eval_exact_match_for_task1155_bard_word_analogy": 55.0, "eval_exact_match_for_task1156_bard_word_analogy": 47.0, "eval_exact_match_for_task1157_bard_word_analogy": 51.0, "eval_exact_match_for_task1158_bard_word_analogy": 49.0, "eval_exact_match_for_task1159_bard_word_analogy": 24.0, "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 9.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 2.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, "eval_exact_match_for_task1386_anli_textual_entailment": 29.0, "eval_exact_match_for_task1387_anli_textual_entailment": 35.0, "eval_exact_match_for_task1388_cb_textual_entailment": 21.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 65.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 0.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 38.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 49.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 39.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 39.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 17.0, "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, "eval_exact_match_for_task1615_sick_textual_entailment": 37.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 60.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_exact_match_for_task1659_billsum_title_generation": 12.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 31.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, "eval_exact_match_for_task190_snli_textual_entailment": 50.0, "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, "eval_exact_match_for_task202_multinli_textual_entailment": 29.0, "eval_exact_match_for_task219_rocstories_title_generation": 13.0, "eval_exact_match_for_task220_rocstories_title_generation": 50.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_exact_match_for_task232_iirc_answerability_classification": 57.0, "eval_exact_match_for_task233_iirc_answerability_classification": 48.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 47.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 12.0, "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, "eval_exact_match_for_task330_gap_coreference_resolution": 45.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 54.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 52.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 28.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 6.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, "eval_exact_match_for_task613_liar_keyword_tagging": 15.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 21.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 53.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 35.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 63.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 1.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 47.0, "eval_exact_match_for_task743_eurlex_title_generation": 1.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 48.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 41.0, "eval_exact_match_for_task891_gap_coreference_resolution": 44.0, "eval_exact_match_for_task892_gap_coreference_resolution": 48.0, "eval_exact_match_for_task893_gap_coreference_resolution": 27.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, "eval_exact_match_for_textual_entailment": 40.875, "eval_exact_match_for_title_generation": 8.352, "eval_exact_match_for_word_analogy": 39.375, "eval_f1": 46.264, "eval_f1_for_answerability_classification": 51.9487, "eval_f1_for_cause_effect_classification": 55.787, "eval_f1_for_coreference_resolution": 45.69, "eval_f1_for_data_to_text": 53.5415, "eval_f1_for_dialogue_act_recognition": 51.0, "eval_f1_for_grammar_error_correction": 57.3058, "eval_f1_for_keyword_tagging": 48.4508, "eval_f1_for_overlap_extraction": 37.7383, "eval_f1_for_question_rewriting": 67.2374, "eval_f1_for_task020_mctaco_answerability_classification": 50.0, "eval_f1_for_task033_winogrande_coreference_resolution": 41.5, "eval_f1_for_task034_winogrande_question_rewriting": 59.4823, "eval_f1_for_task035_winogrande_question_rewriting": 85.4531, "eval_f1_for_task036_qasc_keyword_tagging": 60.63, "eval_f1_for_task039_qasc_overlap_extraction": 32.3333, "eval_f1_for_task050_multirc_answerability_classification": 51.0, "eval_f1_for_task102_commongen_data_to_text": 55.8628, "eval_f1_for_task1152_bard_word_analogy": 34.0, "eval_f1_for_task1153_bard_word_analogy": 28.0, "eval_f1_for_task1154_bard_word_analogy": 27.0, "eval_f1_for_task1155_bard_word_analogy": 55.0, "eval_f1_for_task1156_bard_word_analogy": 47.0, "eval_f1_for_task1157_bard_word_analogy": 51.0, "eval_f1_for_task1158_bard_word_analogy": 49.0, "eval_f1_for_task1159_bard_word_analogy": 24.0, "eval_f1_for_task1161_coda_19_title_generation": 26.4094, "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.5824, "eval_f1_for_task121_atomic_question_rewriting": 50.9477, "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.3249, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 40.447, "eval_f1_for_task1356_xlsum_title_generation": 12.1518, "eval_f1_for_task1358_xlsum_title_generation": 31.0621, "eval_f1_for_task1385_anli_textual_entailment": 33.0, "eval_f1_for_task1386_anli_textual_entailment": 29.0, "eval_f1_for_task1387_anli_textual_entailment": 35.0, "eval_f1_for_task1388_cb_textual_entailment": 21.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 65.0, "eval_f1_for_task1407_dart_data_to_text": 37.3975, "eval_f1_for_task1409_dart_data_to_text": 53.0856, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.69, "eval_f1_for_task1439_doqa_answerability_classification": 38.0, "eval_f1_for_task1442_doqa_answerability_classification": 49.0, "eval_f1_for_task1516_imppres_textual_entailment": 39.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 28.4298, "eval_f1_for_task1554_scitail_textual_entailment": 50.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.9216, "eval_f1_for_task1562_zest_question_rewriting": 55.9402, "eval_f1_for_task1586_scifact_title_generation": 26.8659, "eval_f1_for_task1598_nyc_data_to_text": 51.138, "eval_f1_for_task1612_sick_textual_entailment": 33.0, "eval_f1_for_task1615_sick_textual_entailment": 37.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.2224, "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_f1_for_task1631_open_pi_data_to_text": 95.2751, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_f1_for_task1659_billsum_title_generation": 42.4348, "eval_f1_for_task1664_wino_bias_coreference_resolution": 72.3333, "eval_f1_for_task1728_web_nlg_data_to_text": 58.852, "eval_f1_for_task190_snli_textual_entailment": 50.0, "eval_f1_for_task199_multinli_textual_entailment": 50.0, "eval_f1_for_task200_multinli_textual_entailment": 34.0, "eval_f1_for_task201_multinli_textual_entailment": 33.0, "eval_f1_for_task202_multinli_textual_entailment": 29.0, "eval_f1_for_task219_rocstories_title_generation": 26.2667, "eval_f1_for_task220_rocstories_title_generation": 50.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_f1_for_task232_iirc_answerability_classification": 57.0, "eval_f1_for_task233_iirc_answerability_classification": 48.0, "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 62.8333, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 43.1433, "eval_f1_for_task288_gigaword_title_generation": 29.3362, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 61.2167, "eval_f1_for_task329_gap_coreference_resolution": 33.0, "eval_f1_for_task330_gap_coreference_resolution": 53.7571, "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 84.6667, "eval_f1_for_task392_cod3s_cause_effect_classification": 84.0, "eval_f1_for_task393_cod3s_cause_effect_classification": 32.4234, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 30.0, "eval_f1_for_task402_grailqa_question_rewriting": 82.5249, "eval_f1_for_task418_persent_title_generation": 24.5747, "eval_f1_for_task442_com_qa_question_rewriting": 71.5369, "eval_f1_for_task500_scruples_title_generation": 14.7261, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.9704, "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, "eval_f1_for_task569_recipe_nlg_title_generation": 42.3802, "eval_f1_for_task602_wikitext_title_generation": 13.1346, "eval_f1_for_task613_liar_keyword_tagging": 17.6667, "eval_f1_for_task614_glucose_cause_effect_classification": 39.419, "eval_f1_for_task619_ohsumed_title_generation": 32.6017, "eval_f1_for_task620_ohsumed_keyword_tagging": 42.1, "eval_f1_for_task623_ohsumed_keyword_tagging": 53.0, "eval_f1_for_task640_e_snli_textual_entailment": 35.0, "eval_f1_for_task641_e_snli_textual_entailment": 34.0, "eval_f1_for_task642_e_snli_textual_entailment": 50.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 68.8571, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 6.8857, "eval_f1_for_task670_ambigqa_question_rewriting": 71.9186, "eval_f1_for_task671_ambigqa_question_rewriting": 62.5564, "eval_f1_for_task677_ollie_data_to_text": 35.5112, "eval_f1_for_task738_perspectrum_textual_entailment": 47.0, "eval_f1_for_task743_eurlex_title_generation": 28.0812, "eval_f1_for_task760_msr_sqa_data_to_text": 2.7812, "eval_f1_for_task769_qed_title_generation": 58.6905, "eval_f1_for_task827_copa_cause_effect_classification": 50.0, "eval_f1_for_task828_copa_cause_effect_classification": 50.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, "eval_f1_for_task890_gwsd_textual_entailment": 41.0, "eval_f1_for_task891_gap_coreference_resolution": 53.1333, "eval_f1_for_task892_gap_coreference_resolution": 48.0, "eval_f1_for_task893_gap_coreference_resolution": 27.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_f1_for_task957_e2e_data_to_text": 54.4076, "eval_f1_for_task970_sherliic_textual_entailment": 50.0, "eval_f1_for_textual_entailment": 40.875, "eval_f1_for_title_generation": 30.1199, "eval_f1_for_word_analogy": 39.375, "eval_gen_len": 10.5342, "eval_global_step": 100, "eval_loss": 1.0650111436843872, "eval_rouge1": 48.1785, "eval_rouge1_for_answerability_classification": 51.9487, "eval_rouge1_for_cause_effect_classification": 56.6205, "eval_rouge1_for_coreference_resolution": 46.7158, "eval_rouge1_for_data_to_text": 56.3881, "eval_rouge1_for_dialogue_act_recognition": 54.0095, "eval_rouge1_for_grammar_error_correction": 62.367, "eval_rouge1_for_keyword_tagging": 54.0129, "eval_rouge1_for_overlap_extraction": 41.8084, "eval_rouge1_for_question_rewriting": 68.924, "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 43.5, "eval_rouge1_for_task034_winogrande_question_rewriting": 59.6458, "eval_rouge1_for_task035_winogrande_question_rewriting": 86.4193, "eval_rouge1_for_task036_qasc_keyword_tagging": 67.2549, "eval_rouge1_for_task039_qasc_overlap_extraction": 39.6667, "eval_rouge1_for_task050_multirc_answerability_classification": 51.0, "eval_rouge1_for_task102_commongen_data_to_text": 69.4018, "eval_rouge1_for_task1152_bard_word_analogy": 34.0, "eval_rouge1_for_task1153_bard_word_analogy": 28.0, "eval_rouge1_for_task1154_bard_word_analogy": 27.0, "eval_rouge1_for_task1155_bard_word_analogy": 55.0, "eval_rouge1_for_task1156_bard_word_analogy": 47.0, "eval_rouge1_for_task1157_bard_word_analogy": 51.0, "eval_rouge1_for_task1158_bard_word_analogy": 49.0, "eval_rouge1_for_task1159_bard_word_analogy": 24.0, "eval_rouge1_for_task1161_coda_19_title_generation": 29.9273, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.9084, "eval_rouge1_for_task121_atomic_question_rewriting": 53.5591, "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.5337, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 44.2199, "eval_rouge1_for_task1356_xlsum_title_generation": 14.6223, "eval_rouge1_for_task1358_xlsum_title_generation": 36.2903, "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, "eval_rouge1_for_task1386_anli_textual_entailment": 29.0, "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, "eval_rouge1_for_task1388_cb_textual_entailment": 21.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 69.4, "eval_rouge1_for_task1407_dart_data_to_text": 38.7151, "eval_rouge1_for_task1409_dart_data_to_text": 53.2778, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.579, "eval_rouge1_for_task1439_doqa_answerability_classification": 38.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 49.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 39.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 43.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 30.4356, "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 88.155, "eval_rouge1_for_task1562_zest_question_rewriting": 58.9557, "eval_rouge1_for_task1586_scifact_title_generation": 29.5846, "eval_rouge1_for_task1598_nyc_data_to_text": 51.7184, "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, "eval_rouge1_for_task1615_sick_textual_entailment": 79.0, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.527, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 95.3147, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_rouge1_for_task1659_billsum_title_generation": 43.691, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 72.3333, "eval_rouge1_for_task1728_web_nlg_data_to_text": 61.1735, "eval_rouge1_for_task190_snli_textual_entailment": 50.0, "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, "eval_rouge1_for_task202_multinli_textual_entailment": 29.0, "eval_rouge1_for_task219_rocstories_title_generation": 31.981, "eval_rouge1_for_task220_rocstories_title_generation": 50.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_rouge1_for_task232_iirc_answerability_classification": 57.0, "eval_rouge1_for_task233_iirc_answerability_classification": 48.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 64.0, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 43.9502, "eval_rouge1_for_task288_gigaword_title_generation": 32.1016, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 61.5167, "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, "eval_rouge1_for_task330_gap_coreference_resolution": 53.6857, "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 84.6667, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.0, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.1406, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 41.0, "eval_rouge1_for_task402_grailqa_question_rewriting": 84.4909, "eval_rouge1_for_task418_persent_title_generation": 27.4867, "eval_rouge1_for_task442_com_qa_question_rewriting": 74.9328, "eval_rouge1_for_task500_scruples_title_generation": 15.8713, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.7594, "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 43.4689, "eval_rouge1_for_task602_wikitext_title_generation": 13.7031, "eval_rouge1_for_task613_liar_keyword_tagging": 32.5, "eval_rouge1_for_task614_glucose_cause_effect_classification": 44.5366, "eval_rouge1_for_task619_ohsumed_title_generation": 35.4842, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 47.6667, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 53.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 35.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 69.6429, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 6.8524, "eval_rouge1_for_task670_ambigqa_question_rewriting": 72.7755, "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.73, "eval_rouge1_for_task677_ollie_data_to_text": 38.2961, "eval_rouge1_for_task738_perspectrum_textual_entailment": 67.0, "eval_rouge1_for_task743_eurlex_title_generation": 29.4203, "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.9757, "eval_rouge1_for_task769_qed_title_generation": 58.7786, "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 41.0, "eval_rouge1_for_task891_gap_coreference_resolution": 53.1333, "eval_rouge1_for_task892_gap_coreference_resolution": 48.0, "eval_rouge1_for_task893_gap_coreference_resolution": 27.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_rouge1_for_task957_e2e_data_to_text": 57.0944, "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, "eval_rouge1_for_textual_entailment": 43.4583, "eval_rouge1_for_title_generation": 32.1719, "eval_rouge1_for_word_analogy": 39.375, "eval_rougeL": 46.7901, "eval_rougeL_for_answerability_classification": 51.9487, "eval_rougeL_for_cause_effect_classification": 55.4852, "eval_rougeL_for_coreference_resolution": 46.709, "eval_rougeL_for_data_to_text": 48.3958, "eval_rougeL_for_dialogue_act_recognition": 54.0095, "eval_rougeL_for_grammar_error_correction": 61.5912, "eval_rougeL_for_keyword_tagging": 53.2007, "eval_rougeL_for_overlap_extraction": 41.3452, "eval_rougeL_for_question_rewriting": 65.3221, "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 43.5, "eval_rougeL_for_task034_winogrande_question_rewriting": 59.0875, "eval_rougeL_for_task035_winogrande_question_rewriting": 84.9618, "eval_rougeL_for_task036_qasc_keyword_tagging": 64.5941, "eval_rougeL_for_task039_qasc_overlap_extraction": 39.6667, "eval_rougeL_for_task050_multirc_answerability_classification": 51.0, "eval_rougeL_for_task102_commongen_data_to_text": 58.9554, "eval_rougeL_for_task1152_bard_word_analogy": 34.0, "eval_rougeL_for_task1153_bard_word_analogy": 28.0, "eval_rougeL_for_task1154_bard_word_analogy": 27.0, "eval_rougeL_for_task1155_bard_word_analogy": 55.0, "eval_rougeL_for_task1156_bard_word_analogy": 47.0, "eval_rougeL_for_task1157_bard_word_analogy": 51.0, "eval_rougeL_for_task1158_bard_word_analogy": 49.0, "eval_rougeL_for_task1159_bard_word_analogy": 24.0, "eval_rougeL_for_task1161_coda_19_title_generation": 24.1191, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.4622, "eval_rougeL_for_task121_atomic_question_rewriting": 49.1799, "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 9.9246, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 41.2633, "eval_rougeL_for_task1356_xlsum_title_generation": 12.1365, "eval_rougeL_for_task1358_xlsum_title_generation": 29.5516, "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, "eval_rougeL_for_task1386_anli_textual_entailment": 29.0, "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, "eval_rougeL_for_task1388_cb_textual_entailment": 21.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 69.4, "eval_rougeL_for_task1407_dart_data_to_text": 32.092, "eval_rougeL_for_task1409_dart_data_to_text": 43.9521, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.9512, "eval_rougeL_for_task1439_doqa_answerability_classification": 38.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 49.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 39.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 43.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 28.759, "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 87.2312, "eval_rougeL_for_task1562_zest_question_rewriting": 52.0447, "eval_rougeL_for_task1586_scifact_title_generation": 23.1968, "eval_rougeL_for_task1598_nyc_data_to_text": 41.2979, "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, "eval_rougeL_for_task1615_sick_textual_entailment": 79.0, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.2302, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 95.0594, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_rougeL_for_task1659_billsum_title_generation": 39.5126, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 72.3333, "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.4178, "eval_rougeL_for_task190_snli_textual_entailment": 50.0, "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, "eval_rougeL_for_task202_multinli_textual_entailment": 29.0, "eval_rougeL_for_task219_rocstories_title_generation": 31.981, "eval_rougeL_for_task220_rocstories_title_generation": 50.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_rougeL_for_task232_iirc_answerability_classification": 57.0, "eval_rougeL_for_task233_iirc_answerability_classification": 48.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 64.0, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 43.0238, "eval_rougeL_for_task288_gigaword_title_generation": 28.0769, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 61.5167, "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, "eval_rougeL_for_task330_gap_coreference_resolution": 53.6857, "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 84.6667, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.0, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.7927, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 41.0, "eval_rougeL_for_task402_grailqa_question_rewriting": 71.2875, "eval_rougeL_for_task418_persent_title_generation": 23.8913, "eval_rougeL_for_task442_com_qa_question_rewriting": 70.6656, "eval_rougeL_for_task500_scruples_title_generation": 14.9463, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.2299, "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 41.68, "eval_rougeL_for_task602_wikitext_title_generation": 13.6515, "eval_rougeL_for_task613_liar_keyword_tagging": 32.5, "eval_rougeL_for_task614_glucose_cause_effect_classification": 37.9372, "eval_rougeL_for_task619_ohsumed_title_generation": 32.1578, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 46.2667, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 53.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 35.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 69.6429, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 6.7571, "eval_rougeL_for_task670_ambigqa_question_rewriting": 70.8485, "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.5123, "eval_rougeL_for_task677_ollie_data_to_text": 30.9508, "eval_rougeL_for_task738_perspectrum_textual_entailment": 67.0, "eval_rougeL_for_task743_eurlex_title_generation": 26.6538, "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.882, "eval_rougeL_for_task769_qed_title_generation": 58.5286, "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 55.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 41.0, "eval_rougeL_for_task891_gap_coreference_resolution": 53.1333, "eval_rougeL_for_task892_gap_coreference_resolution": 48.0, "eval_rougeL_for_task893_gap_coreference_resolution": 27.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_rougeL_for_task957_e2e_data_to_text": 44.2748, "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, "eval_rougeL_for_textual_entailment": 43.4583, "eval_rougeL_for_title_generation": 29.642, "eval_rougeL_for_word_analogy": 39.375, "eval_runtime": 1085.5988, "eval_samples_per_second": 10.971, "eval_steps_per_second": 0.686, "step": 100 }, { "epoch": 0.04, "learning_rate": 5e-05, "loss": 1.2871, "step": 200 }, { "epoch": 0.04, "eval_exact_match": 31.0747, "eval_exact_match_for_answerability_classification": 51.6154, "eval_exact_match_for_cause_effect_classification": 33.5714, "eval_exact_match_for_coreference_resolution": 35.6429, "eval_exact_match_for_data_to_text": 8.7167, "eval_exact_match_for_dialogue_act_recognition": 49.4286, "eval_exact_match_for_grammar_error_correction": 8.5, "eval_exact_match_for_keyword_tagging": 44.2, "eval_exact_match_for_overlap_extraction": 10.5, "eval_exact_match_for_question_rewriting": 3.6364, "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 46.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 35.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, "eval_exact_match_for_task050_multirc_answerability_classification": 51.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 35.0, "eval_exact_match_for_task1153_bard_word_analogy": 32.0, "eval_exact_match_for_task1154_bard_word_analogy": 31.0, "eval_exact_match_for_task1155_bard_word_analogy": 58.0, "eval_exact_match_for_task1156_bard_word_analogy": 49.0, "eval_exact_match_for_task1157_bard_word_analogy": 62.0, "eval_exact_match_for_task1158_bard_word_analogy": 59.0, "eval_exact_match_for_task1159_bard_word_analogy": 41.0, "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 17.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 2.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, "eval_exact_match_for_task1387_anli_textual_entailment": 35.0, "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 49.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 74.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 0.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 56.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 45.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 45.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 32.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 62.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 17.0, "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 48.0, "eval_exact_match_for_task1615_sick_textual_entailment": 45.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 12.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 68.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 54.0, "eval_exact_match_for_task1659_billsum_title_generation": 23.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 16.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, "eval_exact_match_for_task190_snli_textual_entailment": 36.0, "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, "eval_exact_match_for_task200_multinli_textual_entailment": 46.0, "eval_exact_match_for_task201_multinli_textual_entailment": 28.0, "eval_exact_match_for_task202_multinli_textual_entailment": 31.0, "eval_exact_match_for_task219_rocstories_title_generation": 11.0, "eval_exact_match_for_task220_rocstories_title_generation": 50.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, "eval_exact_match_for_task232_iirc_answerability_classification": 53.0, "eval_exact_match_for_task233_iirc_answerability_classification": 45.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 55.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 52.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, "eval_exact_match_for_task330_gap_coreference_resolution": 62.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 51.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 46.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 40.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 28.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 5.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 60.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, "eval_exact_match_for_task613_liar_keyword_tagging": 18.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 23.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 72.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 35.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 35.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 73.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 1.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 58.0, "eval_exact_match_for_task743_eurlex_title_generation": 0.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 59.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 47.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 52.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 44.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 44.0, "eval_exact_match_for_task891_gap_coreference_resolution": 55.0, "eval_exact_match_for_task892_gap_coreference_resolution": 26.0, "eval_exact_match_for_task893_gap_coreference_resolution": 27.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 56.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 63.0, "eval_exact_match_for_textual_entailment": 43.5833, "eval_exact_match_for_title_generation": 9.3049, "eval_exact_match_for_word_analogy": 45.875, "eval_f1": 47.7538, "eval_f1_for_answerability_classification": 54.1795, "eval_f1_for_cause_effect_classification": 54.3582, "eval_f1_for_coreference_resolution": 42.5552, "eval_f1_for_data_to_text": 53.9774, "eval_f1_for_dialogue_act_recognition": 53.0, "eval_f1_for_grammar_error_correction": 57.1275, "eval_f1_for_keyword_tagging": 57.9306, "eval_f1_for_overlap_extraction": 30.5173, "eval_f1_for_question_rewriting": 65.7297, "eval_f1_for_task020_mctaco_answerability_classification": 50.0, "eval_f1_for_task033_winogrande_coreference_resolution": 47.1667, "eval_f1_for_task034_winogrande_question_rewriting": 48.7572, "eval_f1_for_task035_winogrande_question_rewriting": 82.6949, "eval_f1_for_task036_qasc_keyword_tagging": 69.0387, "eval_f1_for_task039_qasc_overlap_extraction": 30.8333, "eval_f1_for_task050_multirc_answerability_classification": 51.0, "eval_f1_for_task102_commongen_data_to_text": 58.2243, "eval_f1_for_task1152_bard_word_analogy": 35.0, "eval_f1_for_task1153_bard_word_analogy": 32.0, "eval_f1_for_task1154_bard_word_analogy": 31.0, "eval_f1_for_task1155_bard_word_analogy": 58.0, "eval_f1_for_task1156_bard_word_analogy": 49.0, "eval_f1_for_task1157_bard_word_analogy": 62.0, "eval_f1_for_task1158_bard_word_analogy": 59.0, "eval_f1_for_task1159_bard_word_analogy": 41.0, "eval_f1_for_task1161_coda_19_title_generation": 27.5783, "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.4086, "eval_f1_for_task121_atomic_question_rewriting": 51.4216, "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.8719, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 38.8153, "eval_f1_for_task1356_xlsum_title_generation": 16.2304, "eval_f1_for_task1358_xlsum_title_generation": 31.6202, "eval_f1_for_task1385_anli_textual_entailment": 34.0, "eval_f1_for_task1386_anli_textual_entailment": 34.0, "eval_f1_for_task1387_anli_textual_entailment": 35.0, "eval_f1_for_task1388_cb_textual_entailment": 20.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 74.0, "eval_f1_for_task1407_dart_data_to_text": 41.0553, "eval_f1_for_task1409_dart_data_to_text": 49.5384, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.1403, "eval_f1_for_task1439_doqa_answerability_classification": 49.0, "eval_f1_for_task1442_doqa_answerability_classification": 56.0, "eval_f1_for_task1516_imppres_textual_entailment": 45.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 45.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 32.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 62.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_f1_for_task1540_peer_read_title_generation": 32.9225, "eval_f1_for_task1554_scitail_textual_entailment": 50.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.1146, "eval_f1_for_task1562_zest_question_rewriting": 57.8053, "eval_f1_for_task1586_scifact_title_generation": 29.4849, "eval_f1_for_task1598_nyc_data_to_text": 50.5019, "eval_f1_for_task1612_sick_textual_entailment": 48.0, "eval_f1_for_task1615_sick_textual_entailment": 45.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.9381, "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_f1_for_task1631_open_pi_data_to_text": 96.2879, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 54.0, "eval_f1_for_task1659_billsum_title_generation": 43.1749, "eval_f1_for_task1664_wino_bias_coreference_resolution": 68.4381, "eval_f1_for_task1728_web_nlg_data_to_text": 56.7162, "eval_f1_for_task190_snli_textual_entailment": 36.0, "eval_f1_for_task199_multinli_textual_entailment": 50.0, "eval_f1_for_task200_multinli_textual_entailment": 46.0, "eval_f1_for_task201_multinli_textual_entailment": 28.0, "eval_f1_for_task202_multinli_textual_entailment": 31.0, "eval_f1_for_task219_rocstories_title_generation": 28.9738, "eval_f1_for_task220_rocstories_title_generation": 50.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, "eval_f1_for_task232_iirc_answerability_classification": 53.0, "eval_f1_for_task233_iirc_answerability_classification": 45.0, "eval_f1_for_task242_tweetqa_answerability_classification": 55.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 66.4381, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 30.2012, "eval_f1_for_task288_gigaword_title_generation": 28.2174, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 7.6667, "eval_f1_for_task329_gap_coreference_resolution": 33.0, "eval_f1_for_task330_gap_coreference_resolution": 70.6968, "eval_f1_for_task349_squad2.0_answerability_classification": 51.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 82.0, "eval_f1_for_task392_cod3s_cause_effect_classification": 80.0, "eval_f1_for_task393_cod3s_cause_effect_classification": 31.0516, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 31.8333, "eval_f1_for_task402_grailqa_question_rewriting": 80.2901, "eval_f1_for_task418_persent_title_generation": 24.9272, "eval_f1_for_task442_com_qa_question_rewriting": 69.8931, "eval_f1_for_task500_scruples_title_generation": 20.562, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.2914, "eval_f1_for_task520_aquamuse_answerability_classification": 60.0, "eval_f1_for_task569_recipe_nlg_title_generation": 44.2689, "eval_f1_for_task602_wikitext_title_generation": 12.519, "eval_f1_for_task613_liar_keyword_tagging": 21.3333, "eval_f1_for_task614_glucose_cause_effect_classification": 38.4556, "eval_f1_for_task619_ohsumed_title_generation": 40.3392, "eval_f1_for_task620_ohsumed_keyword_tagging": 46.8667, "eval_f1_for_task623_ohsumed_keyword_tagging": 72.0, "eval_f1_for_task640_e_snli_textual_entailment": 31.0, "eval_f1_for_task641_e_snli_textual_entailment": 35.0, "eval_f1_for_task642_e_snli_textual_entailment": 35.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 80.4143, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 1.0, "eval_f1_for_task670_ambigqa_question_rewriting": 73.3789, "eval_f1_for_task671_ambigqa_question_rewriting": 58.6241, "eval_f1_for_task677_ollie_data_to_text": 35.1377, "eval_f1_for_task738_perspectrum_textual_entailment": 58.0, "eval_f1_for_task743_eurlex_title_generation": 31.6015, "eval_f1_for_task760_msr_sqa_data_to_text": 4.8111, "eval_f1_for_task769_qed_title_generation": 71.4548, "eval_f1_for_task827_copa_cause_effect_classification": 47.0, "eval_f1_for_task828_copa_cause_effect_classification": 52.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 44.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.0, "eval_f1_for_task890_gwsd_textual_entailment": 44.0, "eval_f1_for_task891_gap_coreference_resolution": 67.5333, "eval_f1_for_task892_gap_coreference_resolution": 26.0, "eval_f1_for_task893_gap_coreference_resolution": 27.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 56.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, "eval_f1_for_task957_e2e_data_to_text": 57.1409, "eval_f1_for_task970_sherliic_textual_entailment": 63.0, "eval_f1_for_textual_entailment": 43.5833, "eval_f1_for_title_generation": 33.0177, "eval_f1_for_word_analogy": 45.875, "eval_gen_len": 9.6336, "eval_global_step": 200, "eval_loss": 1.0445849895477295, "eval_rouge1": 49.4067, "eval_rouge1_for_answerability_classification": 54.1795, "eval_rouge1_for_cause_effect_classification": 54.9841, "eval_rouge1_for_coreference_resolution": 43.0066, "eval_rouge1_for_data_to_text": 56.7723, "eval_rouge1_for_dialogue_act_recognition": 56.1524, "eval_rouge1_for_grammar_error_correction": 62.1114, "eval_rouge1_for_keyword_tagging": 62.3246, "eval_rouge1_for_overlap_extraction": 34.9326, "eval_rouge1_for_question_rewriting": 67.4374, "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 47.0667, "eval_rouge1_for_task034_winogrande_question_rewriting": 48.7774, "eval_rouge1_for_task035_winogrande_question_rewriting": 83.4885, "eval_rouge1_for_task036_qasc_keyword_tagging": 74.4418, "eval_rouge1_for_task039_qasc_overlap_extraction": 38.3333, "eval_rouge1_for_task050_multirc_answerability_classification": 51.0, "eval_rouge1_for_task102_commongen_data_to_text": 71.3349, "eval_rouge1_for_task1152_bard_word_analogy": 35.0, "eval_rouge1_for_task1153_bard_word_analogy": 32.0, "eval_rouge1_for_task1154_bard_word_analogy": 31.0, "eval_rouge1_for_task1155_bard_word_analogy": 58.0, "eval_rouge1_for_task1156_bard_word_analogy": 49.0, "eval_rouge1_for_task1157_bard_word_analogy": 62.0, "eval_rouge1_for_task1158_bard_word_analogy": 59.0, "eval_rouge1_for_task1159_bard_word_analogy": 41.0, "eval_rouge1_for_task1161_coda_19_title_generation": 31.2109, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 82.0305, "eval_rouge1_for_task121_atomic_question_rewriting": 53.7479, "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.2362, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 42.3319, "eval_rouge1_for_task1356_xlsum_title_generation": 19.1863, "eval_rouge1_for_task1358_xlsum_title_generation": 36.3691, "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 76.4, "eval_rouge1_for_task1407_dart_data_to_text": 42.6521, "eval_rouge1_for_task1409_dart_data_to_text": 50.1649, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.9933, "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 56.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 45.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 45.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 43.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 62.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_rouge1_for_task1540_peer_read_title_generation": 35.2766, "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.2295, "eval_rouge1_for_task1562_zest_question_rewriting": 60.4247, "eval_rouge1_for_task1586_scifact_title_generation": 33.4504, "eval_rouge1_for_task1598_nyc_data_to_text": 50.9886, "eval_rouge1_for_task1612_sick_textual_entailment": 48.0, "eval_rouge1_for_task1615_sick_textual_entailment": 81.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.2037, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 96.2957, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 54.0, "eval_rouge1_for_task1659_billsum_title_generation": 44.5149, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 68.4381, "eval_rouge1_for_task1728_web_nlg_data_to_text": 58.6511, "eval_rouge1_for_task190_snli_textual_entailment": 36.0, "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, "eval_rouge1_for_task200_multinli_textual_entailment": 46.0, "eval_rouge1_for_task201_multinli_textual_entailment": 28.0, "eval_rouge1_for_task202_multinli_textual_entailment": 31.0, "eval_rouge1_for_task219_rocstories_title_generation": 34.2595, "eval_rouge1_for_task220_rocstories_title_generation": 50.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, "eval_rouge1_for_task232_iirc_answerability_classification": 53.0, "eval_rouge1_for_task233_iirc_answerability_classification": 45.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 55.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 67.7833, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 31.5318, "eval_rouge1_for_task288_gigaword_title_generation": 31.4023, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 8.3333, "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, "eval_rouge1_for_task330_gap_coreference_resolution": 70.6524, "eval_rouge1_for_task349_squad2.0_answerability_classification": 51.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 82.0, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 80.0, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.3455, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 36.5, "eval_rouge1_for_task402_grailqa_question_rewriting": 82.5444, "eval_rouge1_for_task418_persent_title_generation": 28.6886, "eval_rouge1_for_task442_com_qa_question_rewriting": 73.4865, "eval_rouge1_for_task500_scruples_title_generation": 21.7752, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 44.0856, "eval_rouge1_for_task520_aquamuse_answerability_classification": 60.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 45.0842, "eval_rouge1_for_task602_wikitext_title_generation": 13.0362, "eval_rouge1_for_task613_liar_keyword_tagging": 34.7333, "eval_rouge1_for_task614_glucose_cause_effect_classification": 42.5436, "eval_rouge1_for_task619_ohsumed_title_generation": 43.8635, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 49.5333, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 72.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 35.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 35.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 80.9143, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 1.0, "eval_rouge1_for_task670_ambigqa_question_rewriting": 75.1431, "eval_rouge1_for_task671_ambigqa_question_rewriting": 59.6329, "eval_rouge1_for_task677_ollie_data_to_text": 38.0614, "eval_rouge1_for_task738_perspectrum_textual_entailment": 63.0, "eval_rouge1_for_task743_eurlex_title_generation": 33.0992, "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.9396, "eval_rouge1_for_task769_qed_title_generation": 71.3734, "eval_rouge1_for_task827_copa_cause_effect_classification": 47.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 52.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 44.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 43.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 44.0, "eval_rouge1_for_task891_gap_coreference_resolution": 67.319, "eval_rouge1_for_task892_gap_coreference_resolution": 26.0, "eval_rouge1_for_task893_gap_coreference_resolution": 27.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 56.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, "eval_rouge1_for_task957_e2e_data_to_text": 59.5057, "eval_rouge1_for_task970_sherliic_textual_entailment": 63.0, "eval_rouge1_for_textual_entailment": 45.3194, "eval_rouge1_for_title_generation": 35.2481, "eval_rouge1_for_word_analogy": 45.875, "eval_rougeL": 47.9651, "eval_rougeL_for_answerability_classification": 54.1795, "eval_rougeL_for_cause_effect_classification": 54.0839, "eval_rougeL_for_coreference_resolution": 43.0066, "eval_rougeL_for_data_to_text": 48.7214, "eval_rougeL_for_dialogue_act_recognition": 56.1524, "eval_rougeL_for_grammar_error_correction": 61.4562, "eval_rougeL_for_keyword_tagging": 61.5861, "eval_rougeL_for_overlap_extraction": 34.3413, "eval_rougeL_for_question_rewriting": 63.5791, "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 47.0667, "eval_rougeL_for_task034_winogrande_question_rewriting": 47.9016, "eval_rougeL_for_task035_winogrande_question_rewriting": 81.856, "eval_rougeL_for_task036_qasc_keyword_tagging": 72.5831, "eval_rougeL_for_task039_qasc_overlap_extraction": 38.3333, "eval_rougeL_for_task050_multirc_answerability_classification": 51.0, "eval_rougeL_for_task102_commongen_data_to_text": 61.9261, "eval_rougeL_for_task1152_bard_word_analogy": 35.0, "eval_rougeL_for_task1153_bard_word_analogy": 32.0, "eval_rougeL_for_task1154_bard_word_analogy": 31.0, "eval_rougeL_for_task1155_bard_word_analogy": 58.0, "eval_rougeL_for_task1156_bard_word_analogy": 49.0, "eval_rougeL_for_task1157_bard_word_analogy": 62.0, "eval_rougeL_for_task1158_bard_word_analogy": 59.0, "eval_rougeL_for_task1159_bard_word_analogy": 41.0, "eval_rougeL_for_task1161_coda_19_title_generation": 25.9483, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 80.2666, "eval_rougeL_for_task121_atomic_question_rewriting": 49.9815, "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.7485, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 39.2958, "eval_rougeL_for_task1356_xlsum_title_generation": 16.5982, "eval_rougeL_for_task1358_xlsum_title_generation": 29.4902, "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 49.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 76.4, "eval_rougeL_for_task1407_dart_data_to_text": 35.4253, "eval_rougeL_for_task1409_dart_data_to_text": 41.1406, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.6824, "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 56.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 45.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 45.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 43.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 62.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_rougeL_for_task1540_peer_read_title_generation": 32.2671, "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.23, "eval_rougeL_for_task1562_zest_question_rewriting": 54.2752, "eval_rougeL_for_task1586_scifact_title_generation": 27.4227, "eval_rougeL_for_task1598_nyc_data_to_text": 40.3089, "eval_rougeL_for_task1612_sick_textual_entailment": 48.0, "eval_rougeL_for_task1615_sick_textual_entailment": 81.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.9714, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 96.023, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 54.0, "eval_rougeL_for_task1659_billsum_title_generation": 41.189, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 68.4381, "eval_rougeL_for_task1728_web_nlg_data_to_text": 49.9803, "eval_rougeL_for_task190_snli_textual_entailment": 36.0, "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, "eval_rougeL_for_task200_multinli_textual_entailment": 46.0, "eval_rougeL_for_task201_multinli_textual_entailment": 28.0, "eval_rougeL_for_task202_multinli_textual_entailment": 31.0, "eval_rougeL_for_task219_rocstories_title_generation": 33.5262, "eval_rougeL_for_task220_rocstories_title_generation": 50.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, "eval_rougeL_for_task232_iirc_answerability_classification": 53.0, "eval_rougeL_for_task233_iirc_answerability_classification": 45.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 55.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 67.7833, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 30.3492, "eval_rougeL_for_task288_gigaword_title_generation": 27.464, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 8.3333, "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, "eval_rougeL_for_task330_gap_coreference_resolution": 70.6524, "eval_rougeL_for_task349_squad2.0_answerability_classification": 51.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 82.0, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 80.0, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.6545, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 36.5, "eval_rougeL_for_task402_grailqa_question_rewriting": 66.7294, "eval_rougeL_for_task418_persent_title_generation": 24.9426, "eval_rougeL_for_task442_com_qa_question_rewriting": 69.0747, "eval_rougeL_for_task500_scruples_title_generation": 20.1268, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 43.3742, "eval_rougeL_for_task520_aquamuse_answerability_classification": 60.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 42.8921, "eval_rougeL_for_task602_wikitext_title_generation": 13.0362, "eval_rougeL_for_task613_liar_keyword_tagging": 34.7333, "eval_rougeL_for_task614_glucose_cause_effect_classification": 36.9326, "eval_rougeL_for_task619_ohsumed_title_generation": 38.2482, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 47.7, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 72.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 35.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 35.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 80.9143, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 1.0, "eval_rougeL_for_task670_ambigqa_question_rewriting": 73.2975, "eval_rougeL_for_task671_ambigqa_question_rewriting": 57.7207, "eval_rougeL_for_task677_ollie_data_to_text": 30.4731, "eval_rougeL_for_task738_perspectrum_textual_entailment": 63.0, "eval_rougeL_for_task743_eurlex_title_generation": 29.2524, "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.428, "eval_rougeL_for_task769_qed_title_generation": 71.1234, "eval_rougeL_for_task827_copa_cause_effect_classification": 47.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 52.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 44.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 43.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 44.0, "eval_rougeL_for_task891_gap_coreference_resolution": 67.319, "eval_rougeL_for_task892_gap_coreference_resolution": 26.0, "eval_rougeL_for_task893_gap_coreference_resolution": 27.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 56.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, "eval_rougeL_for_task957_e2e_data_to_text": 46.0099, "eval_rougeL_for_task970_sherliic_textual_entailment": 63.0, "eval_rougeL_for_textual_entailment": 45.3194, "eval_rougeL_for_title_generation": 32.4307, "eval_rougeL_for_word_analogy": 45.875, "eval_runtime": 1016.7205, "eval_samples_per_second": 11.714, "eval_steps_per_second": 0.733, "step": 200 }, { "epoch": 0.11, "learning_rate": 5e-05, "loss": 1.179, "step": 500 }, { "epoch": 0.11, "eval_exact_match": 30.3442, "eval_exact_match_for_answerability_classification": 51.0769, "eval_exact_match_for_cause_effect_classification": 38.2857, "eval_exact_match_for_coreference_resolution": 38.1429, "eval_exact_match_for_data_to_text": 8.3535, "eval_exact_match_for_dialogue_act_recognition": 47.8571, "eval_exact_match_for_grammar_error_correction": 7.5, "eval_exact_match_for_keyword_tagging": 40.4, "eval_exact_match_for_overlap_extraction": 8.5, "eval_exact_match_for_question_rewriting": 1.8182, "eval_exact_match_for_task020_mctaco_answerability_classification": 48.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 48.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 6.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 10.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 17.0, "eval_exact_match_for_task050_multirc_answerability_classification": 60.0, "eval_exact_match_for_task102_commongen_data_to_text": 1.0, "eval_exact_match_for_task1152_bard_word_analogy": 37.0, "eval_exact_match_for_task1153_bard_word_analogy": 28.0, "eval_exact_match_for_task1154_bard_word_analogy": 31.0, "eval_exact_match_for_task1155_bard_word_analogy": 67.0, "eval_exact_match_for_task1156_bard_word_analogy": 53.0, "eval_exact_match_for_task1157_bard_word_analogy": 54.0, "eval_exact_match_for_task1158_bard_word_analogy": 52.0, "eval_exact_match_for_task1159_bard_word_analogy": 40.0, "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 2.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 67.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 0.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 46.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 34.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 30.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, "eval_exact_match_for_task1615_sick_textual_entailment": 42.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 65.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_exact_match_for_task1659_billsum_title_generation": 16.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 28.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 3.0, "eval_exact_match_for_task190_snli_textual_entailment": 50.0, "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, "eval_exact_match_for_task201_multinli_textual_entailment": 32.0, "eval_exact_match_for_task202_multinli_textual_entailment": 22.0, "eval_exact_match_for_task219_rocstories_title_generation": 7.0, "eval_exact_match_for_task220_rocstories_title_generation": 50.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, "eval_exact_match_for_task232_iirc_answerability_classification": 51.0, "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 59.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 51.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, "eval_exact_match_for_task330_gap_coreference_resolution": 56.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 55.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 64.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 54.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 25.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 1.0, "eval_exact_match_for_task418_persent_title_generation": 2.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 5.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 9.0, "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 3.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 17.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 72.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 84.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 14.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 30.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 70.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 30.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 43.0, "eval_exact_match_for_task891_gap_coreference_resolution": 56.0, "eval_exact_match_for_task892_gap_coreference_resolution": 45.0, "eval_exact_match_for_task893_gap_coreference_resolution": 25.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, "eval_exact_match_for_textual_entailment": 39.8333, "eval_exact_match_for_title_generation": 9.6413, "eval_exact_match_for_word_analogy": 45.25, "eval_f1": 47.4551, "eval_f1_for_answerability_classification": 53.641, "eval_f1_for_cause_effect_classification": 56.7836, "eval_f1_for_coreference_resolution": 44.3909, "eval_f1_for_data_to_text": 54.7255, "eval_f1_for_dialogue_act_recognition": 51.0714, "eval_f1_for_grammar_error_correction": 57.2143, "eval_f1_for_keyword_tagging": 57.1003, "eval_f1_for_overlap_extraction": 39.1652, "eval_f1_for_question_rewriting": 62.1332, "eval_f1_for_task020_mctaco_answerability_classification": 48.0, "eval_f1_for_task033_winogrande_coreference_resolution": 50.5, "eval_f1_for_task034_winogrande_question_rewriting": 13.4998, "eval_f1_for_task035_winogrande_question_rewriting": 79.4112, "eval_f1_for_task036_qasc_keyword_tagging": 59.0541, "eval_f1_for_task039_qasc_overlap_extraction": 25.4, "eval_f1_for_task050_multirc_answerability_classification": 60.0, "eval_f1_for_task102_commongen_data_to_text": 58.2729, "eval_f1_for_task1152_bard_word_analogy": 37.0, "eval_f1_for_task1153_bard_word_analogy": 28.6667, "eval_f1_for_task1154_bard_word_analogy": 31.0, "eval_f1_for_task1155_bard_word_analogy": 67.0, "eval_f1_for_task1156_bard_word_analogy": 55.0, "eval_f1_for_task1157_bard_word_analogy": 54.0, "eval_f1_for_task1158_bard_word_analogy": 52.0, "eval_f1_for_task1159_bard_word_analogy": 40.0, "eval_f1_for_task1161_coda_19_title_generation": 36.9434, "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.0413, "eval_f1_for_task121_atomic_question_rewriting": 44.3964, "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 9.3839, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 41.4801, "eval_f1_for_task1356_xlsum_title_generation": 22.7791, "eval_f1_for_task1358_xlsum_title_generation": 34.9325, "eval_f1_for_task1385_anli_textual_entailment": 34.0, "eval_f1_for_task1386_anli_textual_entailment": 34.0, "eval_f1_for_task1387_anli_textual_entailment": 33.0, "eval_f1_for_task1388_cb_textual_entailment": 20.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 67.0, "eval_f1_for_task1407_dart_data_to_text": 40.2858, "eval_f1_for_task1409_dart_data_to_text": 51.7213, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.2321, "eval_f1_for_task1439_doqa_answerability_classification": 46.0, "eval_f1_for_task1442_doqa_answerability_classification": 52.0, "eval_f1_for_task1516_imppres_textual_entailment": 34.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 30.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 39.0411, "eval_f1_for_task1554_scitail_textual_entailment": 50.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.1965, "eval_f1_for_task1562_zest_question_rewriting": 54.0455, "eval_f1_for_task1586_scifact_title_generation": 36.2934, "eval_f1_for_task1598_nyc_data_to_text": 52.814, "eval_f1_for_task1612_sick_textual_entailment": 33.0, "eval_f1_for_task1615_sick_textual_entailment": 42.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.497, "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, "eval_f1_for_task1631_open_pi_data_to_text": 96.645, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_f1_for_task1659_billsum_title_generation": 45.158, "eval_f1_for_task1664_wino_bias_coreference_resolution": 68.2857, "eval_f1_for_task1728_web_nlg_data_to_text": 60.426, "eval_f1_for_task190_snli_textual_entailment": 50.0, "eval_f1_for_task199_multinli_textual_entailment": 50.0, "eval_f1_for_task200_multinli_textual_entailment": 34.0, "eval_f1_for_task201_multinli_textual_entailment": 32.0, "eval_f1_for_task202_multinli_textual_entailment": 22.0, "eval_f1_for_task219_rocstories_title_generation": 26.619, "eval_f1_for_task220_rocstories_title_generation": 50.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, "eval_f1_for_task232_iirc_answerability_classification": 51.0, "eval_f1_for_task233_iirc_answerability_classification": 50.0, "eval_f1_for_task242_tweetqa_answerability_classification": 59.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 64.2048, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 52.9304, "eval_f1_for_task288_gigaword_title_generation": 31.6203, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 6.8667, "eval_f1_for_task329_gap_coreference_resolution": 33.0, "eval_f1_for_task330_gap_coreference_resolution": 63.1381, "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 77.5, "eval_f1_for_task391_cod3s_cause_effect_classification": 88.0, "eval_f1_for_task392_cod3s_cause_effect_classification": 84.6667, "eval_f1_for_task393_cod3s_cause_effect_classification": 28.5404, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 27.5, "eval_f1_for_task402_grailqa_question_rewriting": 81.4543, "eval_f1_for_task418_persent_title_generation": 29.0479, "eval_f1_for_task442_com_qa_question_rewriting": 70.0415, "eval_f1_for_task500_scruples_title_generation": 22.3776, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.5711, "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, "eval_f1_for_task569_recipe_nlg_title_generation": 42.3447, "eval_f1_for_task602_wikitext_title_generation": 15.8446, "eval_f1_for_task613_liar_keyword_tagging": 22.3333, "eval_f1_for_task614_glucose_cause_effect_classification": 46.278, "eval_f1_for_task619_ohsumed_title_generation": 45.7576, "eval_f1_for_task620_ohsumed_keyword_tagging": 38.6333, "eval_f1_for_task623_ohsumed_keyword_tagging": 72.0, "eval_f1_for_task640_e_snli_textual_entailment": 31.0, "eval_f1_for_task641_e_snli_textual_entailment": 34.0, "eval_f1_for_task642_e_snli_textual_entailment": 50.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.481, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 19.5, "eval_f1_for_task670_ambigqa_question_rewriting": 75.6794, "eval_f1_for_task671_ambigqa_question_rewriting": 63.9186, "eval_f1_for_task677_ollie_data_to_text": 35.6377, "eval_f1_for_task738_perspectrum_textual_entailment": 30.0, "eval_f1_for_task743_eurlex_title_generation": 32.8313, "eval_f1_for_task760_msr_sqa_data_to_text": 3.0232, "eval_f1_for_task769_qed_title_generation": 81.6804, "eval_f1_for_task827_copa_cause_effect_classification": 50.0, "eval_f1_for_task828_copa_cause_effect_classification": 50.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 30.0, "eval_f1_for_task890_gwsd_textual_entailment": 43.0, "eval_f1_for_task891_gap_coreference_resolution": 68.4778, "eval_f1_for_task892_gap_coreference_resolution": 45.0, "eval_f1_for_task893_gap_coreference_resolution": 25.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task957_e2e_data_to_text": 55.4442, "eval_f1_for_task970_sherliic_textual_entailment": 50.0, "eval_f1_for_textual_entailment": 39.8333, "eval_f1_for_title_generation": 36.0813, "eval_f1_for_word_analogy": 45.5833, "eval_gen_len": 9.1353, "eval_global_step": 500, "eval_loss": 1.038378119468689, "eval_rouge1": 49.371, "eval_rouge1_for_answerability_classification": 53.641, "eval_rouge1_for_cause_effect_classification": 57.5249, "eval_rouge1_for_coreference_resolution": 44.9563, "eval_rouge1_for_data_to_text": 57.4238, "eval_rouge1_for_dialogue_act_recognition": 53.4524, "eval_rouge1_for_grammar_error_correction": 62.1418, "eval_rouge1_for_keyword_tagging": 61.7486, "eval_rouge1_for_overlap_extraction": 41.46, "eval_rouge1_for_question_rewriting": 63.7433, "eval_rouge1_for_task020_mctaco_answerability_classification": 48.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 50.4, "eval_rouge1_for_task034_winogrande_question_rewriting": 13.4602, "eval_rouge1_for_task035_winogrande_question_rewriting": 80.3286, "eval_rouge1_for_task036_qasc_keyword_tagging": 63.5097, "eval_rouge1_for_task039_qasc_overlap_extraction": 28.7333, "eval_rouge1_for_task050_multirc_answerability_classification": 60.0, "eval_rouge1_for_task102_commongen_data_to_text": 69.9542, "eval_rouge1_for_task1152_bard_word_analogy": 37.0, "eval_rouge1_for_task1153_bard_word_analogy": 28.6667, "eval_rouge1_for_task1154_bard_word_analogy": 31.0, "eval_rouge1_for_task1155_bard_word_analogy": 67.0, "eval_rouge1_for_task1156_bard_word_analogy": 55.0, "eval_rouge1_for_task1157_bard_word_analogy": 54.0, "eval_rouge1_for_task1158_bard_word_analogy": 52.0, "eval_rouge1_for_task1159_bard_word_analogy": 40.0, "eval_rouge1_for_task1161_coda_19_title_generation": 40.9729, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.2705, "eval_rouge1_for_task121_atomic_question_rewriting": 47.0305, "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.3516, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 44.9978, "eval_rouge1_for_task1356_xlsum_title_generation": 26.3109, "eval_rouge1_for_task1358_xlsum_title_generation": 39.2928, "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 69.0, "eval_rouge1_for_task1407_dart_data_to_text": 42.2352, "eval_rouge1_for_task1409_dart_data_to_text": 52.2692, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.0751, "eval_rouge1_for_task1439_doqa_answerability_classification": 46.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 34.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 36.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 41.5767, "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.2085, "eval_rouge1_for_task1562_zest_question_rewriting": 56.4154, "eval_rouge1_for_task1586_scifact_title_generation": 40.3838, "eval_rouge1_for_task1598_nyc_data_to_text": 53.6169, "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, "eval_rouge1_for_task1615_sick_textual_entailment": 80.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.8029, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 96.7753, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_rouge1_for_task1659_billsum_title_generation": 46.4617, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 68.2381, "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.2679, "eval_rouge1_for_task190_snli_textual_entailment": 50.0, "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, "eval_rouge1_for_task201_multinli_textual_entailment": 32.0, "eval_rouge1_for_task202_multinli_textual_entailment": 22.0, "eval_rouge1_for_task219_rocstories_title_generation": 29.169, "eval_rouge1_for_task220_rocstories_title_generation": 50.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, "eval_rouge1_for_task232_iirc_answerability_classification": 51.0, "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 59.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 65.55, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 54.1867, "eval_rouge1_for_task288_gigaword_title_generation": 34.6308, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 6.8667, "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, "eval_rouge1_for_task330_gap_coreference_resolution": 63.0667, "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 77.5, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 88.0, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.6667, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 28.9178, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 33.1667, "eval_rouge1_for_task402_grailqa_question_rewriting": 83.9972, "eval_rouge1_for_task418_persent_title_generation": 32.45, "eval_rouge1_for_task442_com_qa_question_rewriting": 73.6494, "eval_rouge1_for_task500_scruples_title_generation": 25.3633, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.7605, "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 43.4414, "eval_rouge1_for_task602_wikitext_title_generation": 16.7891, "eval_rouge1_for_task613_liar_keyword_tagging": 36.1667, "eval_rouge1_for_task614_glucose_cause_effect_classification": 51.0901, "eval_rouge1_for_task619_ohsumed_title_generation": 49.2498, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 43.0857, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 72.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.981, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 20.5, "eval_rouge1_for_task670_ambigqa_question_rewriting": 76.6346, "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.5893, "eval_rouge1_for_task677_ollie_data_to_text": 38.234, "eval_rouge1_for_task738_perspectrum_textual_entailment": 72.0, "eval_rouge1_for_task743_eurlex_title_generation": 34.058, "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.2124, "eval_rouge1_for_task769_qed_title_generation": 82.1657, "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 38.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 43.0, "eval_rouge1_for_task891_gap_coreference_resolution": 68.6, "eval_rouge1_for_task892_gap_coreference_resolution": 45.0, "eval_rouge1_for_task893_gap_coreference_resolution": 25.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task957_e2e_data_to_text": 58.1325, "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, "eval_rouge1_for_textual_entailment": 43.1944, "eval_rouge1_for_title_generation": 38.3263, "eval_rouge1_for_word_analogy": 45.5833, "eval_rougeL": 47.7862, "eval_rougeL_for_answerability_classification": 53.641, "eval_rougeL_for_cause_effect_classification": 56.7158, "eval_rougeL_for_coreference_resolution": 44.9563, "eval_rougeL_for_data_to_text": 49.0535, "eval_rougeL_for_dialogue_act_recognition": 53.4524, "eval_rougeL_for_grammar_error_correction": 61.5642, "eval_rougeL_for_keyword_tagging": 61.1868, "eval_rougeL_for_overlap_extraction": 40.4263, "eval_rougeL_for_question_rewriting": 59.4076, "eval_rougeL_for_task020_mctaco_answerability_classification": 48.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 50.4, "eval_rougeL_for_task034_winogrande_question_rewriting": 13.4602, "eval_rougeL_for_task035_winogrande_question_rewriting": 78.4637, "eval_rougeL_for_task036_qasc_keyword_tagging": 62.1009, "eval_rougeL_for_task039_qasc_overlap_extraction": 28.7333, "eval_rougeL_for_task050_multirc_answerability_classification": 60.0, "eval_rougeL_for_task102_commongen_data_to_text": 60.4933, "eval_rougeL_for_task1152_bard_word_analogy": 37.0, "eval_rougeL_for_task1153_bard_word_analogy": 28.6667, "eval_rougeL_for_task1154_bard_word_analogy": 31.0, "eval_rougeL_for_task1155_bard_word_analogy": 67.0, "eval_rougeL_for_task1156_bard_word_analogy": 55.0, "eval_rougeL_for_task1157_bard_word_analogy": 54.0, "eval_rougeL_for_task1158_bard_word_analogy": 52.0, "eval_rougeL_for_task1159_bard_word_analogy": 40.0, "eval_rougeL_for_task1161_coda_19_title_generation": 33.493, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.0372, "eval_rougeL_for_task121_atomic_question_rewriting": 40.2486, "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 10.1266, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 41.7045, "eval_rougeL_for_task1356_xlsum_title_generation": 21.7561, "eval_rougeL_for_task1358_xlsum_title_generation": 32.2062, "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 69.0, "eval_rougeL_for_task1407_dart_data_to_text": 34.7769, "eval_rougeL_for_task1409_dart_data_to_text": 42.4152, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.7847, "eval_rougeL_for_task1439_doqa_answerability_classification": 46.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 34.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 36.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 37.7164, "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.3436, "eval_rougeL_for_task1562_zest_question_rewriting": 48.1137, "eval_rougeL_for_task1586_scifact_title_generation": 32.6487, "eval_rougeL_for_task1598_nyc_data_to_text": 41.8511, "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, "eval_rougeL_for_task1615_sick_textual_entailment": 80.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.5061, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 96.0267, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_rougeL_for_task1659_billsum_title_generation": 41.6793, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 68.2381, "eval_rougeL_for_task1728_web_nlg_data_to_text": 54.2926, "eval_rougeL_for_task190_snli_textual_entailment": 50.0, "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, "eval_rougeL_for_task201_multinli_textual_entailment": 32.0, "eval_rougeL_for_task202_multinli_textual_entailment": 22.0, "eval_rougeL_for_task219_rocstories_title_generation": 29.169, "eval_rougeL_for_task220_rocstories_title_generation": 50.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, "eval_rougeL_for_task232_iirc_answerability_classification": 51.0, "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 59.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 65.55, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 52.1192, "eval_rougeL_for_task288_gigaword_title_generation": 30.5082, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 6.8667, "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, "eval_rougeL_for_task330_gap_coreference_resolution": 63.0667, "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 77.5, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 88.0, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.6667, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.144, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 33.1667, "eval_rougeL_for_task402_grailqa_question_rewriting": 67.2948, "eval_rougeL_for_task418_persent_title_generation": 29.1499, "eval_rougeL_for_task442_com_qa_question_rewriting": 68.936, "eval_rougeL_for_task500_scruples_title_generation": 23.1932, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 43.3623, "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.4833, "eval_rougeL_for_task602_wikitext_title_generation": 16.7891, "eval_rougeL_for_task613_liar_keyword_tagging": 36.1667, "eval_rougeL_for_task614_glucose_cause_effect_classification": 46.1999, "eval_rougeL_for_task619_ohsumed_title_generation": 41.2945, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 41.6857, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 72.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.981, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 20.5, "eval_rougeL_for_task670_ambigqa_question_rewriting": 74.9216, "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.7969, "eval_rougeL_for_task677_ollie_data_to_text": 31.2223, "eval_rougeL_for_task738_perspectrum_textual_entailment": 72.0, "eval_rougeL_for_task743_eurlex_title_generation": 30.9147, "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.1086, "eval_rougeL_for_task769_qed_title_generation": 81.7212, "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 38.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 43.0, "eval_rougeL_for_task891_gap_coreference_resolution": 68.6, "eval_rougeL_for_task892_gap_coreference_resolution": 45.0, "eval_rougeL_for_task893_gap_coreference_resolution": 25.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task957_e2e_data_to_text": 43.2957, "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, "eval_rougeL_for_textual_entailment": 43.1944, "eval_rougeL_for_title_generation": 34.951, "eval_rougeL_for_word_analogy": 45.5833, "eval_runtime": 931.3967, "eval_samples_per_second": 12.787, "eval_steps_per_second": 0.8, "step": 500 }, { "epoch": 0.22, "learning_rate": 5e-05, "loss": 1.1112, "step": 1000 }, { "epoch": 0.22, "eval_exact_match": 30.7305, "eval_exact_match_for_answerability_classification": 51.1538, "eval_exact_match_for_cause_effect_classification": 36.7143, "eval_exact_match_for_coreference_resolution": 40.5, "eval_exact_match_for_data_to_text": 7.2639, "eval_exact_match_for_dialogue_act_recognition": 47.8571, "eval_exact_match_for_grammar_error_correction": 6.5, "eval_exact_match_for_keyword_tagging": 37.2, "eval_exact_match_for_overlap_extraction": 11.0, "eval_exact_match_for_question_rewriting": 1.8182, "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 54.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 3.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 20.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, "eval_exact_match_for_task050_multirc_answerability_classification": 54.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 40.0, "eval_exact_match_for_task1153_bard_word_analogy": 37.0, "eval_exact_match_for_task1154_bard_word_analogy": 28.0, "eval_exact_match_for_task1155_bard_word_analogy": 77.0, "eval_exact_match_for_task1156_bard_word_analogy": 53.0, "eval_exact_match_for_task1157_bard_word_analogy": 55.0, "eval_exact_match_for_task1158_bard_word_analogy": 57.0, "eval_exact_match_for_task1159_bard_word_analogy": 31.0, "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 4.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 29.0, "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, "eval_exact_match_for_task1387_anli_textual_entailment": 35.0, "eval_exact_match_for_task1388_cb_textual_entailment": 34.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 72.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 0.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 34.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 39.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 52.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 53.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 41.0, "eval_exact_match_for_task1615_sick_textual_entailment": 41.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 54.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 61.0, "eval_exact_match_for_task1659_billsum_title_generation": 7.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 33.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, "eval_exact_match_for_task190_snli_textual_entailment": 21.0, "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, "eval_exact_match_for_task200_multinli_textual_entailment": 74.0, "eval_exact_match_for_task201_multinli_textual_entailment": 29.0, "eval_exact_match_for_task202_multinli_textual_entailment": 6.0, "eval_exact_match_for_task219_rocstories_title_generation": 5.0, "eval_exact_match_for_task220_rocstories_title_generation": 77.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, "eval_exact_match_for_task232_iirc_answerability_classification": 42.0, "eval_exact_match_for_task233_iirc_answerability_classification": 41.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 53.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 52.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 16.0, "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, "eval_exact_match_for_task330_gap_coreference_resolution": 63.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 57.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 21.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 2.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 5.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 55.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, "eval_exact_match_for_task602_wikitext_title_generation": 2.381, "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 20.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 80.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 9.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 37.0, "eval_exact_match_for_task743_eurlex_title_generation": 3.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 71.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 57.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 22.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 38.0, "eval_exact_match_for_task891_gap_coreference_resolution": 57.0, "eval_exact_match_for_task892_gap_coreference_resolution": 52.0, "eval_exact_match_for_task893_gap_coreference_resolution": 27.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 52.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, "eval_exact_match_for_textual_entailment": 40.4167, "eval_exact_match_for_title_generation": 10.4821, "eval_exact_match_for_word_analogy": 47.25, "eval_f1": 48.5779, "eval_f1_for_answerability_classification": 53.7179, "eval_f1_for_cause_effect_classification": 57.306, "eval_f1_for_coreference_resolution": 48.1256, "eval_f1_for_data_to_text": 53.952, "eval_f1_for_dialogue_act_recognition": 51.4286, "eval_f1_for_grammar_error_correction": 57.798, "eval_f1_for_keyword_tagging": 52.1509, "eval_f1_for_overlap_extraction": 35.9316, "eval_f1_for_question_rewriting": 70.0244, "eval_f1_for_task020_mctaco_answerability_classification": 52.0, "eval_f1_for_task033_winogrande_coreference_resolution": 55.8333, "eval_f1_for_task034_winogrande_question_rewriting": 87.0844, "eval_f1_for_task035_winogrande_question_rewriting": 84.4687, "eval_f1_for_task036_qasc_keyword_tagging": 60.6455, "eval_f1_for_task039_qasc_overlap_extraction": 25.5, "eval_f1_for_task050_multirc_answerability_classification": 54.0, "eval_f1_for_task102_commongen_data_to_text": 55.8174, "eval_f1_for_task1152_bard_word_analogy": 40.0, "eval_f1_for_task1153_bard_word_analogy": 37.0, "eval_f1_for_task1154_bard_word_analogy": 28.0, "eval_f1_for_task1155_bard_word_analogy": 77.0, "eval_f1_for_task1156_bard_word_analogy": 54.3333, "eval_f1_for_task1157_bard_word_analogy": 55.0, "eval_f1_for_task1158_bard_word_analogy": 57.0, "eval_f1_for_task1159_bard_word_analogy": 31.0, "eval_f1_for_task1161_coda_19_title_generation": 37.0836, "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.2548, "eval_f1_for_task121_atomic_question_rewriting": 47.2439, "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.0808, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 40.0289, "eval_f1_for_task1356_xlsum_title_generation": 23.7967, "eval_f1_for_task1358_xlsum_title_generation": 33.4406, "eval_f1_for_task1385_anli_textual_entailment": 29.0, "eval_f1_for_task1386_anli_textual_entailment": 33.0, "eval_f1_for_task1387_anli_textual_entailment": 35.0, "eval_f1_for_task1388_cb_textual_entailment": 34.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 72.0, "eval_f1_for_task1407_dart_data_to_text": 36.7093, "eval_f1_for_task1409_dart_data_to_text": 50.8633, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 32.0468, "eval_f1_for_task1439_doqa_answerability_classification": 50.0, "eval_f1_for_task1442_doqa_answerability_classification": 50.0, "eval_f1_for_task1516_imppres_textual_entailment": 34.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 52.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 37.1959, "eval_f1_for_task1554_scitail_textual_entailment": 53.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.5492, "eval_f1_for_task1562_zest_question_rewriting": 51.7781, "eval_f1_for_task1586_scifact_title_generation": 30.7352, "eval_f1_for_task1598_nyc_data_to_text": 53.7099, "eval_f1_for_task1612_sick_textual_entailment": 41.0, "eval_f1_for_task1615_sick_textual_entailment": 41.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.6058, "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, "eval_f1_for_task1631_open_pi_data_to_text": 95.6581, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 61.0, "eval_f1_for_task1659_billsum_title_generation": 41.5197, "eval_f1_for_task1664_wino_bias_coreference_resolution": 71.4762, "eval_f1_for_task1728_web_nlg_data_to_text": 62.6333, "eval_f1_for_task190_snli_textual_entailment": 21.0, "eval_f1_for_task199_multinli_textual_entailment": 50.0, "eval_f1_for_task200_multinli_textual_entailment": 74.0, "eval_f1_for_task201_multinli_textual_entailment": 29.0, "eval_f1_for_task202_multinli_textual_entailment": 6.0, "eval_f1_for_task219_rocstories_title_generation": 16.5839, "eval_f1_for_task220_rocstories_title_generation": 77.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, "eval_f1_for_task232_iirc_answerability_classification": 42.0, "eval_f1_for_task233_iirc_answerability_classification": 41.0, "eval_f1_for_task242_tweetqa_answerability_classification": 53.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 62.6381, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 46.3631, "eval_f1_for_task288_gigaword_title_generation": 30.169, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 50.2333, "eval_f1_for_task329_gap_coreference_resolution": 33.0, "eval_f1_for_task330_gap_coreference_resolution": 72.1667, "eval_f1_for_task349_squad2.0_answerability_classification": 57.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 30.682, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 21.6667, "eval_f1_for_task402_grailqa_question_rewriting": 81.7719, "eval_f1_for_task418_persent_title_generation": 28.3586, "eval_f1_for_task442_com_qa_question_rewriting": 71.6756, "eval_f1_for_task500_scruples_title_generation": 13.7244, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 42.2878, "eval_f1_for_task520_aquamuse_answerability_classification": 55.0, "eval_f1_for_task569_recipe_nlg_title_generation": 41.167, "eval_f1_for_task602_wikitext_title_generation": 11.9233, "eval_f1_for_task613_liar_keyword_tagging": 19.3333, "eval_f1_for_task614_glucose_cause_effect_classification": 46.7935, "eval_f1_for_task619_ohsumed_title_generation": 46.5426, "eval_f1_for_task620_ohsumed_keyword_tagging": 38.6333, "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, "eval_f1_for_task640_e_snli_textual_entailment": 31.0, "eval_f1_for_task641_e_snli_textual_entailment": 33.0, "eval_f1_for_task642_e_snli_textual_entailment": 50.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.1424, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 10.0, "eval_f1_for_task670_ambigqa_question_rewriting": 81.8064, "eval_f1_for_task671_ambigqa_question_rewriting": 63.5495, "eval_f1_for_task677_ollie_data_to_text": 33.4824, "eval_f1_for_task738_perspectrum_textual_entailment": 37.0, "eval_f1_for_task743_eurlex_title_generation": 31.7522, "eval_f1_for_task760_msr_sqa_data_to_text": 2.1047, "eval_f1_for_task769_qed_title_generation": 83.481, "eval_f1_for_task827_copa_cause_effect_classification": 50.0, "eval_f1_for_task828_copa_cause_effect_classification": 57.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 22.0, "eval_f1_for_task890_gwsd_textual_entailment": 38.0, "eval_f1_for_task891_gap_coreference_resolution": 67.7444, "eval_f1_for_task892_gap_coreference_resolution": 52.0, "eval_f1_for_task893_gap_coreference_resolution": 27.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 52.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task957_e2e_data_to_text": 56.2227, "eval_f1_for_task970_sherliic_textual_entailment": 50.0, "eval_f1_for_textual_entailment": 40.4167, "eval_f1_for_title_generation": 35.8147, "eval_f1_for_word_analogy": 47.4167, "eval_gen_len": 8.9394, "eval_global_step": 1000, "eval_loss": 1.0606060028076172, "eval_rouge1": 50.5819, "eval_rouge1_for_answerability_classification": 53.7179, "eval_rouge1_for_cause_effect_classification": 58.0995, "eval_rouge1_for_coreference_resolution": 48.6923, "eval_rouge1_for_data_to_text": 56.3961, "eval_rouge1_for_dialogue_act_recognition": 54.2381, "eval_rouge1_for_grammar_error_correction": 62.3067, "eval_rouge1_for_keyword_tagging": 56.8273, "eval_rouge1_for_overlap_extraction": 40.2241, "eval_rouge1_for_question_rewriting": 71.55, "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 55.7333, "eval_rouge1_for_task034_winogrande_question_rewriting": 87.238, "eval_rouge1_for_task035_winogrande_question_rewriting": 85.3159, "eval_rouge1_for_task036_qasc_keyword_tagging": 65.7558, "eval_rouge1_for_task039_qasc_overlap_extraction": 32.1667, "eval_rouge1_for_task050_multirc_answerability_classification": 54.0, "eval_rouge1_for_task102_commongen_data_to_text": 66.7771, "eval_rouge1_for_task1152_bard_word_analogy": 40.0, "eval_rouge1_for_task1153_bard_word_analogy": 37.0, "eval_rouge1_for_task1154_bard_word_analogy": 28.0, "eval_rouge1_for_task1155_bard_word_analogy": 77.0, "eval_rouge1_for_task1156_bard_word_analogy": 54.3333, "eval_rouge1_for_task1157_bard_word_analogy": 55.0, "eval_rouge1_for_task1158_bard_word_analogy": 57.0, "eval_rouge1_for_task1159_bard_word_analogy": 31.0, "eval_rouge1_for_task1161_coda_19_title_generation": 41.418, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 81.4327, "eval_rouge1_for_task121_atomic_question_rewriting": 49.3617, "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.5024, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 43.356, "eval_rouge1_for_task1356_xlsum_title_generation": 27.6062, "eval_rouge1_for_task1358_xlsum_title_generation": 37.9999, "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, "eval_rouge1_for_task1388_cb_textual_entailment": 34.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 74.0, "eval_rouge1_for_task1407_dart_data_to_text": 37.4821, "eval_rouge1_for_task1409_dart_data_to_text": 51.3691, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.2167, "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 34.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 44.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 52.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 40.4686, "eval_rouge1_for_task1554_scitail_textual_entailment": 53.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.3967, "eval_rouge1_for_task1562_zest_question_rewriting": 54.5844, "eval_rouge1_for_task1586_scifact_title_generation": 34.7672, "eval_rouge1_for_task1598_nyc_data_to_text": 54.6547, "eval_rouge1_for_task1612_sick_textual_entailment": 41.0, "eval_rouge1_for_task1615_sick_textual_entailment": 80.3333, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.8237, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 95.6617, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 61.0, "eval_rouge1_for_task1659_billsum_title_generation": 42.8348, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 71.4762, "eval_rouge1_for_task1728_web_nlg_data_to_text": 64.2742, "eval_rouge1_for_task190_snli_textual_entailment": 21.0, "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, "eval_rouge1_for_task200_multinli_textual_entailment": 74.0, "eval_rouge1_for_task201_multinli_textual_entailment": 29.0, "eval_rouge1_for_task202_multinli_textual_entailment": 6.0, "eval_rouge1_for_task219_rocstories_title_generation": 22.8807, "eval_rouge1_for_task220_rocstories_title_generation": 77.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, "eval_rouge1_for_task232_iirc_answerability_classification": 42.0, "eval_rouge1_for_task233_iirc_answerability_classification": 41.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 53.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 63.4833, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 48.2814, "eval_rouge1_for_task288_gigaword_title_generation": 33.384, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 51.2333, "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, "eval_rouge1_for_task330_gap_coreference_resolution": 72.5667, "eval_rouge1_for_task349_squad2.0_answerability_classification": 57.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.8596, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 26.6667, "eval_rouge1_for_task402_grailqa_question_rewriting": 84.3428, "eval_rouge1_for_task418_persent_title_generation": 32.0061, "eval_rouge1_for_task442_com_qa_question_rewriting": 74.9547, "eval_rouge1_for_task500_scruples_title_generation": 15.3533, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.7478, "eval_rouge1_for_task520_aquamuse_answerability_classification": 55.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 42.5453, "eval_rouge1_for_task602_wikitext_title_generation": 12.5074, "eval_rouge1_for_task613_liar_keyword_tagging": 33.5, "eval_rouge1_for_task614_glucose_cause_effect_classification": 52.1703, "eval_rouge1_for_task619_ohsumed_title_generation": 49.9808, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 41.9524, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 92.9281, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 10.5, "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.3694, "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.2704, "eval_rouge1_for_task677_ollie_data_to_text": 36.1905, "eval_rouge1_for_task738_perspectrum_textual_entailment": 82.0, "eval_rouge1_for_task743_eurlex_title_generation": 33.1385, "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.3921, "eval_rouge1_for_task769_qed_title_generation": 83.0345, "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 57.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 38.0, "eval_rouge1_for_task891_gap_coreference_resolution": 68.0333, "eval_rouge1_for_task892_gap_coreference_resolution": 52.0, "eval_rouge1_for_task893_gap_coreference_resolution": 27.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 52.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task957_e2e_data_to_text": 58.8009, "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, "eval_rouge1_for_textual_entailment": 43.9306, "eval_rouge1_for_title_generation": 38.2385, "eval_rouge1_for_word_analogy": 47.4167, "eval_rougeL": 49.1466, "eval_rougeL_for_answerability_classification": 53.7179, "eval_rougeL_for_cause_effect_classification": 57.2797, "eval_rougeL_for_coreference_resolution": 48.6923, "eval_rougeL_for_data_to_text": 48.6683, "eval_rougeL_for_dialogue_act_recognition": 54.2381, "eval_rougeL_for_grammar_error_correction": 61.5247, "eval_rougeL_for_keyword_tagging": 56.4752, "eval_rougeL_for_overlap_extraction": 39.3237, "eval_rougeL_for_question_rewriting": 67.7309, "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 55.7333, "eval_rougeL_for_task034_winogrande_question_rewriting": 86.584, "eval_rougeL_for_task035_winogrande_question_rewriting": 83.974, "eval_rougeL_for_task036_qasc_keyword_tagging": 65.2289, "eval_rougeL_for_task039_qasc_overlap_extraction": 32.1667, "eval_rougeL_for_task050_multirc_answerability_classification": 54.0, "eval_rougeL_for_task102_commongen_data_to_text": 59.5897, "eval_rougeL_for_task1152_bard_word_analogy": 40.0, "eval_rougeL_for_task1153_bard_word_analogy": 37.0, "eval_rougeL_for_task1154_bard_word_analogy": 28.0, "eval_rougeL_for_task1155_bard_word_analogy": 77.0, "eval_rougeL_for_task1156_bard_word_analogy": 54.3333, "eval_rougeL_for_task1157_bard_word_analogy": 55.0, "eval_rougeL_for_task1158_bard_word_analogy": 57.0, "eval_rougeL_for_task1159_bard_word_analogy": 31.0, "eval_rougeL_for_task1161_coda_19_title_generation": 33.7282, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 80.6901, "eval_rougeL_for_task121_atomic_question_rewriting": 44.6053, "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.0909, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 40.2096, "eval_rougeL_for_task1356_xlsum_title_generation": 23.7572, "eval_rougeL_for_task1358_xlsum_title_generation": 31.9112, "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, "eval_rougeL_for_task1388_cb_textual_entailment": 34.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 74.0, "eval_rougeL_for_task1407_dart_data_to_text": 31.6192, "eval_rougeL_for_task1409_dart_data_to_text": 43.5298, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.5175, "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 34.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 44.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 52.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 36.7682, "eval_rougeL_for_task1554_scitail_textual_entailment": 53.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.5318, "eval_rougeL_for_task1562_zest_question_rewriting": 48.1823, "eval_rougeL_for_task1586_scifact_title_generation": 28.1638, "eval_rougeL_for_task1598_nyc_data_to_text": 41.2446, "eval_rougeL_for_task1612_sick_textual_entailment": 41.0, "eval_rougeL_for_task1615_sick_textual_entailment": 80.3333, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.601, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 95.1549, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 61.0, "eval_rougeL_for_task1659_billsum_title_generation": 38.3898, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 71.4762, "eval_rougeL_for_task1728_web_nlg_data_to_text": 55.4853, "eval_rougeL_for_task190_snli_textual_entailment": 21.0, "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, "eval_rougeL_for_task200_multinli_textual_entailment": 74.0, "eval_rougeL_for_task201_multinli_textual_entailment": 29.0, "eval_rougeL_for_task202_multinli_textual_entailment": 6.0, "eval_rougeL_for_task219_rocstories_title_generation": 22.8807, "eval_rougeL_for_task220_rocstories_title_generation": 77.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, "eval_rougeL_for_task232_iirc_answerability_classification": 42.0, "eval_rougeL_for_task233_iirc_answerability_classification": 41.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 53.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 63.4833, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 46.4808, "eval_rougeL_for_task288_gigaword_title_generation": 30.0212, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 51.2333, "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, "eval_rougeL_for_task330_gap_coreference_resolution": 72.5667, "eval_rougeL_for_task349_squad2.0_answerability_classification": 57.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.1422, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 26.6667, "eval_rougeL_for_task402_grailqa_question_rewriting": 67.2238, "eval_rougeL_for_task418_persent_title_generation": 28.0254, "eval_rougeL_for_task442_com_qa_question_rewriting": 70.7026, "eval_rougeL_for_task500_scruples_title_generation": 13.7454, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.4938, "eval_rougeL_for_task520_aquamuse_answerability_classification": 55.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 41.3453, "eval_rougeL_for_task602_wikitext_title_generation": 12.2693, "eval_rougeL_for_task613_liar_keyword_tagging": 33.5, "eval_rougeL_for_task614_glucose_cause_effect_classification": 47.1493, "eval_rougeL_for_task619_ohsumed_title_generation": 41.942, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 40.719, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 92.9281, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 10.5, "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.7803, "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.4872, "eval_rougeL_for_task677_ollie_data_to_text": 30.0699, "eval_rougeL_for_task738_perspectrum_textual_entailment": 82.0, "eval_rougeL_for_task743_eurlex_title_generation": 30.3241, "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.3548, "eval_rougeL_for_task769_qed_title_generation": 83.0345, "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 57.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 38.0, "eval_rougeL_for_task891_gap_coreference_resolution": 68.0333, "eval_rougeL_for_task892_gap_coreference_resolution": 52.0, "eval_rougeL_for_task893_gap_coreference_resolution": 27.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 52.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task957_e2e_data_to_text": 44.6946, "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, "eval_rougeL_for_textual_entailment": 43.9306, "eval_rougeL_for_title_generation": 35.1978, "eval_rougeL_for_word_analogy": 47.4167, "eval_runtime": 858.093, "eval_samples_per_second": 13.88, "eval_steps_per_second": 0.868, "step": 1000 }, { "epoch": 0.33, "learning_rate": 5e-05, "loss": 1.0615, "step": 1500 }, { "epoch": 0.33, "eval_exact_match": 30.4198, "eval_exact_match_for_answerability_classification": 46.6923, "eval_exact_match_for_cause_effect_classification": 39.7143, "eval_exact_match_for_coreference_resolution": 39.1429, "eval_exact_match_for_data_to_text": 8.4746, "eval_exact_match_for_dialogue_act_recognition": 50.1429, "eval_exact_match_for_grammar_error_correction": 6.5, "eval_exact_match_for_keyword_tagging": 37.0, "eval_exact_match_for_overlap_extraction": 11.0, "eval_exact_match_for_question_rewriting": 2.5455, "eval_exact_match_for_task020_mctaco_answerability_classification": 56.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 50.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 3.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 5.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, "eval_exact_match_for_task050_multirc_answerability_classification": 55.0, "eval_exact_match_for_task102_commongen_data_to_text": 1.0, "eval_exact_match_for_task1152_bard_word_analogy": 34.0, "eval_exact_match_for_task1153_bard_word_analogy": 28.0, "eval_exact_match_for_task1154_bard_word_analogy": 25.0, "eval_exact_match_for_task1155_bard_word_analogy": 72.0, "eval_exact_match_for_task1156_bard_word_analogy": 52.0, "eval_exact_match_for_task1157_bard_word_analogy": 51.0, "eval_exact_match_for_task1158_bard_word_analogy": 54.0, "eval_exact_match_for_task1159_bard_word_analogy": 46.0, "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 11.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 52.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 68.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 30.0, "eval_exact_match_for_task1386_anli_textual_entailment": 37.0, "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, "eval_exact_match_for_task1388_cb_textual_entailment": 43.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 46.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 56.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 62.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 0.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 44.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 52.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 46.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 55.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 38.0, "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 65.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 35.0, "eval_exact_match_for_task1615_sick_textual_entailment": 43.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 6.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 64.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 54.0, "eval_exact_match_for_task1659_billsum_title_generation": 2.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 10.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, "eval_exact_match_for_task190_snli_textual_entailment": 0.0, "eval_exact_match_for_task199_multinli_textual_entailment": 47.0, "eval_exact_match_for_task200_multinli_textual_entailment": 50.0, "eval_exact_match_for_task201_multinli_textual_entailment": 35.0, "eval_exact_match_for_task202_multinli_textual_entailment": 12.0, "eval_exact_match_for_task219_rocstories_title_generation": 2.0, "eval_exact_match_for_task220_rocstories_title_generation": 82.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, "eval_exact_match_for_task232_iirc_answerability_classification": 25.0, "eval_exact_match_for_task233_iirc_answerability_classification": 20.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 53.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 43.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, "eval_exact_match_for_task329_gap_coreference_resolution": 43.0, "eval_exact_match_for_task330_gap_coreference_resolution": 63.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 31.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, "eval_exact_match_for_task500_scruples_title_generation": 2.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 52.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 5.0, "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 15.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 65.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 35.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 46.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 49.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 84.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 24.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 27.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 70.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 72.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 57.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 43.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 42.0, "eval_exact_match_for_task891_gap_coreference_resolution": 59.0, "eval_exact_match_for_task892_gap_coreference_resolution": 43.0, "eval_exact_match_for_task893_gap_coreference_resolution": 29.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 69.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 61.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, "eval_exact_match_for_textual_entailment": 40.9167, "eval_exact_match_for_title_generation": 9.9215, "eval_exact_match_for_word_analogy": 45.25, "eval_f1": 48.6056, "eval_f1_for_answerability_classification": 49.2564, "eval_f1_for_cause_effect_classification": 60.6119, "eval_f1_for_coreference_resolution": 46.8398, "eval_f1_for_data_to_text": 54.4895, "eval_f1_for_dialogue_act_recognition": 53.7143, "eval_f1_for_grammar_error_correction": 56.9508, "eval_f1_for_keyword_tagging": 53.5841, "eval_f1_for_overlap_extraction": 41.4205, "eval_f1_for_question_rewriting": 70.5789, "eval_f1_for_task020_mctaco_answerability_classification": 56.0, "eval_f1_for_task033_winogrande_coreference_resolution": 51.3333, "eval_f1_for_task034_winogrande_question_rewriting": 85.1885, "eval_f1_for_task035_winogrande_question_rewriting": 89.014, "eval_f1_for_task036_qasc_keyword_tagging": 53.1257, "eval_f1_for_task039_qasc_overlap_extraction": 28.6333, "eval_f1_for_task050_multirc_answerability_classification": 55.0, "eval_f1_for_task102_commongen_data_to_text": 55.9473, "eval_f1_for_task1152_bard_word_analogy": 34.0, "eval_f1_for_task1153_bard_word_analogy": 28.0, "eval_f1_for_task1154_bard_word_analogy": 25.0, "eval_f1_for_task1155_bard_word_analogy": 72.0, "eval_f1_for_task1156_bard_word_analogy": 52.0, "eval_f1_for_task1157_bard_word_analogy": 51.0, "eval_f1_for_task1158_bard_word_analogy": 54.0, "eval_f1_for_task1159_bard_word_analogy": 46.0, "eval_f1_for_task1161_coda_19_title_generation": 38.2233, "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.6146, "eval_f1_for_task121_atomic_question_rewriting": 48.5533, "eval_f1_for_task133_winowhy_coreference_resolution": 52.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.4559, "eval_f1_for_task1344_rte_textual_entailment": 68.0, "eval_f1_for_task1345_qqp_question_rewriting": 40.7273, "eval_f1_for_task1356_xlsum_title_generation": 22.3293, "eval_f1_for_task1358_xlsum_title_generation": 31.1355, "eval_f1_for_task1385_anli_textual_entailment": 30.0, "eval_f1_for_task1386_anli_textual_entailment": 37.0, "eval_f1_for_task1387_anli_textual_entailment": 33.0, "eval_f1_for_task1388_cb_textual_entailment": 43.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 46.0, "eval_f1_for_task1393_copa_cause_effect_classification": 56.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 62.0, "eval_f1_for_task1407_dart_data_to_text": 36.9905, "eval_f1_for_task1409_dart_data_to_text": 52.2075, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.1894, "eval_f1_for_task1439_doqa_answerability_classification": 44.0, "eval_f1_for_task1442_doqa_answerability_classification": 52.0, "eval_f1_for_task1516_imppres_textual_entailment": 0.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 52.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 46.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 38.0, "eval_f1_for_task1540_peer_read_title_generation": 39.9613, "eval_f1_for_task1554_scitail_textual_entailment": 65.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.7123, "eval_f1_for_task1562_zest_question_rewriting": 52.3113, "eval_f1_for_task1586_scifact_title_generation": 34.0363, "eval_f1_for_task1598_nyc_data_to_text": 52.8353, "eval_f1_for_task1612_sick_textual_entailment": 35.0, "eval_f1_for_task1615_sick_textual_entailment": 43.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.8641, "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_f1_for_task1631_open_pi_data_to_text": 96.557, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 54.0, "eval_f1_for_task1659_billsum_title_generation": 36.6419, "eval_f1_for_task1664_wino_bias_coreference_resolution": 66.1905, "eval_f1_for_task1728_web_nlg_data_to_text": 61.3381, "eval_f1_for_task190_snli_textual_entailment": 0.0, "eval_f1_for_task199_multinli_textual_entailment": 47.0, "eval_f1_for_task200_multinli_textual_entailment": 50.0, "eval_f1_for_task201_multinli_textual_entailment": 35.0, "eval_f1_for_task202_multinli_textual_entailment": 12.0, "eval_f1_for_task219_rocstories_title_generation": 20.6537, "eval_f1_for_task220_rocstories_title_generation": 82.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, "eval_f1_for_task232_iirc_answerability_classification": 25.0, "eval_f1_for_task233_iirc_answerability_classification": 20.0, "eval_f1_for_task242_tweetqa_answerability_classification": 53.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 57.2048, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 54.2076, "eval_f1_for_task288_gigaword_title_generation": 30.6669, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 10.3333, "eval_f1_for_task329_gap_coreference_resolution": 43.0, "eval_f1_for_task330_gap_coreference_resolution": 68.9714, "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 29.9492, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 32.8333, "eval_f1_for_task402_grailqa_question_rewriting": 79.5999, "eval_f1_for_task418_persent_title_generation": 26.9705, "eval_f1_for_task442_com_qa_question_rewriting": 72.3435, "eval_f1_for_task500_scruples_title_generation": 19.919, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.699, "eval_f1_for_task520_aquamuse_answerability_classification": 52.0, "eval_f1_for_task569_recipe_nlg_title_generation": 38.7358, "eval_f1_for_task602_wikitext_title_generation": 11.7651, "eval_f1_for_task613_liar_keyword_tagging": 19.3333, "eval_f1_for_task614_glucose_cause_effect_classification": 49.6673, "eval_f1_for_task619_ohsumed_title_generation": 46.3837, "eval_f1_for_task620_ohsumed_keyword_tagging": 37.1, "eval_f1_for_task623_ohsumed_keyword_tagging": 65.0, "eval_f1_for_task640_e_snli_textual_entailment": 35.0, "eval_f1_for_task641_e_snli_textual_entailment": 46.0, "eval_f1_for_task642_e_snli_textual_entailment": 49.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.3615, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 37.2381, "eval_f1_for_task670_ambigqa_question_rewriting": 80.6849, "eval_f1_for_task671_ambigqa_question_rewriting": 66.4667, "eval_f1_for_task677_ollie_data_to_text": 35.3271, "eval_f1_for_task738_perspectrum_textual_entailment": 27.0, "eval_f1_for_task743_eurlex_title_generation": 38.0284, "eval_f1_for_task760_msr_sqa_data_to_text": 2.9271, "eval_f1_for_task769_qed_title_generation": 85.7156, "eval_f1_for_task827_copa_cause_effect_classification": 50.0, "eval_f1_for_task828_copa_cause_effect_classification": 72.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 57.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 43.0, "eval_f1_for_task890_gwsd_textual_entailment": 42.0, "eval_f1_for_task891_gap_coreference_resolution": 68.6524, "eval_f1_for_task892_gap_coreference_resolution": 43.0, "eval_f1_for_task893_gap_coreference_resolution": 29.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 69.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 61.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, "eval_f1_for_task957_e2e_data_to_text": 58.1191, "eval_f1_for_task970_sherliic_textual_entailment": 50.0, "eval_f1_for_textual_entailment": 40.9167, "eval_f1_for_title_generation": 36.852, "eval_f1_for_word_analogy": 45.25, "eval_gen_len": 9.6371, "eval_global_step": 1500, "eval_loss": 1.1058847904205322, "eval_rouge1": 50.554, "eval_rouge1_for_answerability_classification": 49.2564, "eval_rouge1_for_cause_effect_classification": 61.4872, "eval_rouge1_for_coreference_resolution": 47.4798, "eval_rouge1_for_data_to_text": 57.6063, "eval_rouge1_for_dialogue_act_recognition": 55.2952, "eval_rouge1_for_grammar_error_correction": 61.8269, "eval_rouge1_for_keyword_tagging": 58.16, "eval_rouge1_for_overlap_extraction": 44.052, "eval_rouge1_for_question_rewriting": 72.1518, "eval_rouge1_for_task020_mctaco_answerability_classification": 56.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 51.3333, "eval_rouge1_for_task034_winogrande_question_rewriting": 85.3408, "eval_rouge1_for_task035_winogrande_question_rewriting": 89.6179, "eval_rouge1_for_task036_qasc_keyword_tagging": 57.7384, "eval_rouge1_for_task039_qasc_overlap_extraction": 33.0333, "eval_rouge1_for_task050_multirc_answerability_classification": 55.0, "eval_rouge1_for_task102_commongen_data_to_text": 69.7864, "eval_rouge1_for_task1152_bard_word_analogy": 34.0, "eval_rouge1_for_task1153_bard_word_analogy": 28.0, "eval_rouge1_for_task1154_bard_word_analogy": 25.0, "eval_rouge1_for_task1155_bard_word_analogy": 72.0, "eval_rouge1_for_task1156_bard_word_analogy": 52.0, "eval_rouge1_for_task1157_bard_word_analogy": 51.0, "eval_rouge1_for_task1158_bard_word_analogy": 54.0, "eval_rouge1_for_task1159_bard_word_analogy": 46.0, "eval_rouge1_for_task1161_coda_19_title_generation": 41.8952, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 81.8089, "eval_rouge1_for_task121_atomic_question_rewriting": 50.949, "eval_rouge1_for_task133_winowhy_coreference_resolution": 52.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.1508, "eval_rouge1_for_task1344_rte_textual_entailment": 68.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 43.5425, "eval_rouge1_for_task1356_xlsum_title_generation": 25.6303, "eval_rouge1_for_task1358_xlsum_title_generation": 36.0322, "eval_rouge1_for_task1385_anli_textual_entailment": 30.0, "eval_rouge1_for_task1386_anli_textual_entailment": 37.0, "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, "eval_rouge1_for_task1388_cb_textual_entailment": 43.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 46.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 56.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 64.4, "eval_rouge1_for_task1407_dart_data_to_text": 38.3236, "eval_rouge1_for_task1409_dart_data_to_text": 53.2945, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.8572, "eval_rouge1_for_task1439_doqa_answerability_classification": 44.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 52.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 46.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 38.0, "eval_rouge1_for_task1540_peer_read_title_generation": 43.2242, "eval_rouge1_for_task1554_scitail_textual_entailment": 65.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.7966, "eval_rouge1_for_task1562_zest_question_rewriting": 55.0989, "eval_rouge1_for_task1586_scifact_title_generation": 38.4258, "eval_rouge1_for_task1598_nyc_data_to_text": 54.7392, "eval_rouge1_for_task1612_sick_textual_entailment": 35.0, "eval_rouge1_for_task1615_sick_textual_entailment": 81.0, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.1543, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 96.6747, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 54.0, "eval_rouge1_for_task1659_billsum_title_generation": 38.8958, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 66.1429, "eval_rouge1_for_task1728_web_nlg_data_to_text": 63.8739, "eval_rouge1_for_task190_snli_textual_entailment": 0.0, "eval_rouge1_for_task199_multinli_textual_entailment": 47.0, "eval_rouge1_for_task200_multinli_textual_entailment": 50.0, "eval_rouge1_for_task201_multinli_textual_entailment": 35.0, "eval_rouge1_for_task202_multinli_textual_entailment": 12.0, "eval_rouge1_for_task219_rocstories_title_generation": 24.257, "eval_rouge1_for_task220_rocstories_title_generation": 82.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, "eval_rouge1_for_task232_iirc_answerability_classification": 25.0, "eval_rouge1_for_task233_iirc_answerability_classification": 20.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 53.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 58.05, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 55.0707, "eval_rouge1_for_task288_gigaword_title_generation": 33.2259, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 10.9, "eval_rouge1_for_task329_gap_coreference_resolution": 43.0, "eval_rouge1_for_task330_gap_coreference_resolution": 68.9, "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.7228, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 40.5, "eval_rouge1_for_task402_grailqa_question_rewriting": 82.6141, "eval_rouge1_for_task418_persent_title_generation": 30.6812, "eval_rouge1_for_task442_com_qa_question_rewriting": 75.6684, "eval_rouge1_for_task500_scruples_title_generation": 21.8603, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.493, "eval_rouge1_for_task520_aquamuse_answerability_classification": 52.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 40.2419, "eval_rouge1_for_task602_wikitext_title_generation": 12.4688, "eval_rouge1_for_task613_liar_keyword_tagging": 30.6667, "eval_rouge1_for_task614_glucose_cause_effect_classification": 55.0209, "eval_rouge1_for_task619_ohsumed_title_generation": 49.4879, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 43.5333, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 65.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 35.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 46.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 49.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.8615, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 37.2381, "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.4762, "eval_rouge1_for_task671_ambigqa_question_rewriting": 67.3986, "eval_rouge1_for_task677_ollie_data_to_text": 38.3039, "eval_rouge1_for_task738_perspectrum_textual_entailment": 72.0, "eval_rouge1_for_task743_eurlex_title_generation": 39.6967, "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.1863, "eval_rouge1_for_task769_qed_title_generation": 85.7244, "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 72.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 57.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 51.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 42.0, "eval_rouge1_for_task891_gap_coreference_resolution": 68.6524, "eval_rouge1_for_task892_gap_coreference_resolution": 43.0, "eval_rouge1_for_task893_gap_coreference_resolution": 29.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 69.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 61.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, "eval_rouge1_for_task957_e2e_data_to_text": 60.003, "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, "eval_rouge1_for_textual_entailment": 44.375, "eval_rouge1_for_title_generation": 39.2038, "eval_rouge1_for_word_analogy": 45.25, "eval_rougeL": 49.033, "eval_rougeL_for_answerability_classification": 49.2564, "eval_rougeL_for_cause_effect_classification": 60.5755, "eval_rougeL_for_coreference_resolution": 47.4798, "eval_rougeL_for_data_to_text": 49.311, "eval_rougeL_for_dialogue_act_recognition": 55.2952, "eval_rougeL_for_grammar_error_correction": 61.1992, "eval_rougeL_for_keyword_tagging": 57.4827, "eval_rougeL_for_overlap_extraction": 42.7527, "eval_rougeL_for_question_rewriting": 68.6285, "eval_rougeL_for_task020_mctaco_answerability_classification": 56.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 51.3333, "eval_rougeL_for_task034_winogrande_question_rewriting": 84.573, "eval_rougeL_for_task035_winogrande_question_rewriting": 89.5554, "eval_rougeL_for_task036_qasc_keyword_tagging": 56.0853, "eval_rougeL_for_task039_qasc_overlap_extraction": 33.0333, "eval_rougeL_for_task050_multirc_answerability_classification": 55.0, "eval_rougeL_for_task102_commongen_data_to_text": 61.2535, "eval_rougeL_for_task1152_bard_word_analogy": 34.0, "eval_rougeL_for_task1153_bard_word_analogy": 28.0, "eval_rougeL_for_task1154_bard_word_analogy": 25.0, "eval_rougeL_for_task1155_bard_word_analogy": 72.0, "eval_rougeL_for_task1156_bard_word_analogy": 52.0, "eval_rougeL_for_task1157_bard_word_analogy": 51.0, "eval_rougeL_for_task1158_bard_word_analogy": 54.0, "eval_rougeL_for_task1159_bard_word_analogy": 46.0, "eval_rougeL_for_task1161_coda_19_title_generation": 34.302, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 81.2727, "eval_rougeL_for_task121_atomic_question_rewriting": 44.3672, "eval_rougeL_for_task133_winowhy_coreference_resolution": 52.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.4501, "eval_rougeL_for_task1344_rte_textual_entailment": 68.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 40.8156, "eval_rougeL_for_task1356_xlsum_title_generation": 21.73, "eval_rougeL_for_task1358_xlsum_title_generation": 30.263, "eval_rougeL_for_task1385_anli_textual_entailment": 30.0, "eval_rougeL_for_task1386_anli_textual_entailment": 37.0, "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, "eval_rougeL_for_task1388_cb_textual_entailment": 43.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 46.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 56.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 64.4, "eval_rougeL_for_task1407_dart_data_to_text": 30.9807, "eval_rougeL_for_task1409_dart_data_to_text": 42.2536, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.4675, "eval_rougeL_for_task1439_doqa_answerability_classification": 44.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 52.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 46.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 55.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 38.0, "eval_rougeL_for_task1540_peer_read_title_generation": 38.4007, "eval_rougeL_for_task1554_scitail_textual_entailment": 65.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.9309, "eval_rougeL_for_task1562_zest_question_rewriting": 48.9947, "eval_rougeL_for_task1586_scifact_title_generation": 31.2982, "eval_rougeL_for_task1598_nyc_data_to_text": 42.0754, "eval_rougeL_for_task1612_sick_textual_entailment": 35.0, "eval_rougeL_for_task1615_sick_textual_entailment": 81.0, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.8103, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 96.1924, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 54.0, "eval_rougeL_for_task1659_billsum_title_generation": 33.2618, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 66.1429, "eval_rougeL_for_task1728_web_nlg_data_to_text": 55.3864, "eval_rougeL_for_task190_snli_textual_entailment": 0.0, "eval_rougeL_for_task199_multinli_textual_entailment": 47.0, "eval_rougeL_for_task200_multinli_textual_entailment": 50.0, "eval_rougeL_for_task201_multinli_textual_entailment": 35.0, "eval_rougeL_for_task202_multinli_textual_entailment": 12.0, "eval_rougeL_for_task219_rocstories_title_generation": 24.257, "eval_rougeL_for_task220_rocstories_title_generation": 82.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, "eval_rougeL_for_task232_iirc_answerability_classification": 25.0, "eval_rougeL_for_task233_iirc_answerability_classification": 20.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 53.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 58.05, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 52.4722, "eval_rougeL_for_task288_gigaword_title_generation": 28.8764, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 10.9, "eval_rougeL_for_task329_gap_coreference_resolution": 43.0, "eval_rougeL_for_task330_gap_coreference_resolution": 68.9, "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 29.9151, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 40.5, "eval_rougeL_for_task402_grailqa_question_rewriting": 68.0453, "eval_rougeL_for_task418_persent_title_generation": 27.3438, "eval_rougeL_for_task442_com_qa_question_rewriting": 71.5607, "eval_rougeL_for_task500_scruples_title_generation": 20.4894, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.1613, "eval_rougeL_for_task520_aquamuse_answerability_classification": 52.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 37.5667, "eval_rougeL_for_task602_wikitext_title_generation": 12.32, "eval_rougeL_for_task613_liar_keyword_tagging": 30.6667, "eval_rougeL_for_task614_glucose_cause_effect_classification": 49.4466, "eval_rougeL_for_task619_ohsumed_title_generation": 41.3596, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 41.8, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 65.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 35.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 46.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 49.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.8615, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 37.2381, "eval_rougeL_for_task670_ambigqa_question_rewriting": 80.576, "eval_rougeL_for_task671_ambigqa_question_rewriting": 66.3431, "eval_rougeL_for_task677_ollie_data_to_text": 31.7829, "eval_rougeL_for_task738_perspectrum_textual_entailment": 72.0, "eval_rougeL_for_task743_eurlex_title_generation": 35.3113, "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.0976, "eval_rougeL_for_task769_qed_title_generation": 85.7244, "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 72.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 57.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 51.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 42.0, "eval_rougeL_for_task891_gap_coreference_resolution": 68.6524, "eval_rougeL_for_task892_gap_coreference_resolution": 43.0, "eval_rougeL_for_task893_gap_coreference_resolution": 29.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 69.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 61.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, "eval_rougeL_for_task957_e2e_data_to_text": 46.5787, "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, "eval_rougeL_for_textual_entailment": 44.375, "eval_rougeL_for_title_generation": 35.8265, "eval_rougeL_for_word_analogy": 45.25, "eval_runtime": 920.3447, "eval_samples_per_second": 12.941, "eval_steps_per_second": 0.809, "step": 1500 }, { "epoch": 0.44, "learning_rate": 5e-05, "loss": 1.0365, "step": 2000 }, { "epoch": 0.44, "eval_exact_match": 32.8547, "eval_exact_match_for_answerability_classification": 56.4615, "eval_exact_match_for_cause_effect_classification": 47.2857, "eval_exact_match_for_coreference_resolution": 41.2857, "eval_exact_match_for_data_to_text": 4.4794, "eval_exact_match_for_dialogue_act_recognition": 53.0, "eval_exact_match_for_grammar_error_correction": 6.5, "eval_exact_match_for_keyword_tagging": 47.8, "eval_exact_match_for_overlap_extraction": 14.5, "eval_exact_match_for_question_rewriting": 3.2727, "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 50.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 6.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 23.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 29.0, "eval_exact_match_for_task050_multirc_answerability_classification": 72.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 38.0, "eval_exact_match_for_task1153_bard_word_analogy": 33.0, "eval_exact_match_for_task1154_bard_word_analogy": 23.0, "eval_exact_match_for_task1155_bard_word_analogy": 84.0, "eval_exact_match_for_task1156_bard_word_analogy": 66.0, "eval_exact_match_for_task1157_bard_word_analogy": 55.0, "eval_exact_match_for_task1158_bard_word_analogy": 42.0, "eval_exact_match_for_task1159_bard_word_analogy": 41.0, "eval_exact_match_for_task1161_coda_19_title_generation": 3.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 9.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 53.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 28.0, "eval_exact_match_for_task1386_anli_textual_entailment": 35.0, "eval_exact_match_for_task1387_anli_textual_entailment": 35.0, "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 54.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 69.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 73.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 2.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 43.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 56.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 27.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 54.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 33.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 58.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 41.0, "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 12.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 60.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 29.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 58.0, "eval_exact_match_for_task1659_billsum_title_generation": 2.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 7.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, "eval_exact_match_for_task190_snli_textual_entailment": 0.0, "eval_exact_match_for_task199_multinli_textual_entailment": 42.0, "eval_exact_match_for_task200_multinli_textual_entailment": 79.0, "eval_exact_match_for_task201_multinli_textual_entailment": 22.0, "eval_exact_match_for_task202_multinli_textual_entailment": 5.0, "eval_exact_match_for_task219_rocstories_title_generation": 1.0, "eval_exact_match_for_task220_rocstories_title_generation": 94.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, "eval_exact_match_for_task232_iirc_answerability_classification": 46.0, "eval_exact_match_for_task233_iirc_answerability_classification": 43.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 80.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 54.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 9.0, "eval_exact_match_for_task329_gap_coreference_resolution": 53.0, "eval_exact_match_for_task330_gap_coreference_resolution": 57.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 55.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 70.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 68.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 62.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 37.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 2.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, "eval_exact_match_for_task500_scruples_title_generation": 2.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 62.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 17.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 92.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 34.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 39.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 22.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 3.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 29.0, "eval_exact_match_for_task743_eurlex_title_generation": 0.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 65.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 57.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 75.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 66.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 28.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 40.0, "eval_exact_match_for_task891_gap_coreference_resolution": 51.0, "eval_exact_match_for_task892_gap_coreference_resolution": 49.0, "eval_exact_match_for_task893_gap_coreference_resolution": 32.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 54.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 69.0, "eval_exact_match_for_textual_entailment": 40.8333, "eval_exact_match_for_title_generation": 10.2578, "eval_exact_match_for_word_analogy": 47.75, "eval_f1": 50.1946, "eval_f1_for_answerability_classification": 59.0256, "eval_f1_for_cause_effect_classification": 63.4851, "eval_f1_for_coreference_resolution": 48.7063, "eval_f1_for_data_to_text": 52.8919, "eval_f1_for_dialogue_act_recognition": 55.1429, "eval_f1_for_grammar_error_correction": 56.7348, "eval_f1_for_keyword_tagging": 61.8279, "eval_f1_for_overlap_extraction": 26.0088, "eval_f1_for_question_rewriting": 69.7117, "eval_f1_for_task020_mctaco_answerability_classification": 52.0, "eval_f1_for_task033_winogrande_coreference_resolution": 52.6667, "eval_f1_for_task034_winogrande_question_rewriting": 88.617, "eval_f1_for_task035_winogrande_question_rewriting": 86.1451, "eval_f1_for_task036_qasc_keyword_tagging": 58.5966, "eval_f1_for_task039_qasc_overlap_extraction": 33.0, "eval_f1_for_task050_multirc_answerability_classification": 72.0, "eval_f1_for_task102_commongen_data_to_text": 55.3881, "eval_f1_for_task1152_bard_word_analogy": 38.0, "eval_f1_for_task1153_bard_word_analogy": 33.0, "eval_f1_for_task1154_bard_word_analogy": 23.0, "eval_f1_for_task1155_bard_word_analogy": 84.0, "eval_f1_for_task1156_bard_word_analogy": 66.6667, "eval_f1_for_task1157_bard_word_analogy": 55.0, "eval_f1_for_task1158_bard_word_analogy": 42.0, "eval_f1_for_task1159_bard_word_analogy": 41.0, "eval_f1_for_task1161_coda_19_title_generation": 36.7841, "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.9189, "eval_f1_for_task121_atomic_question_rewriting": 50.162, "eval_f1_for_task133_winowhy_coreference_resolution": 53.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 12.6085, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 39.0971, "eval_f1_for_task1356_xlsum_title_generation": 22.9408, "eval_f1_for_task1358_xlsum_title_generation": 33.0266, "eval_f1_for_task1385_anli_textual_entailment": 28.0, "eval_f1_for_task1386_anli_textual_entailment": 35.0, "eval_f1_for_task1387_anli_textual_entailment": 35.0, "eval_f1_for_task1388_cb_textual_entailment": 40.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 54.0, "eval_f1_for_task1393_copa_cause_effect_classification": 69.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 73.0, "eval_f1_for_task1407_dart_data_to_text": 36.6885, "eval_f1_for_task1409_dart_data_to_text": 52.2731, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.4069, "eval_f1_for_task1439_doqa_answerability_classification": 43.0, "eval_f1_for_task1442_doqa_answerability_classification": 56.0, "eval_f1_for_task1516_imppres_textual_entailment": 27.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 54.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 33.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 36.2123, "eval_f1_for_task1554_scitail_textual_entailment": 58.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.0627, "eval_f1_for_task1562_zest_question_rewriting": 51.3686, "eval_f1_for_task1586_scifact_title_generation": 35.5966, "eval_f1_for_task1598_nyc_data_to_text": 52.5645, "eval_f1_for_task1612_sick_textual_entailment": 41.0, "eval_f1_for_task1615_sick_textual_entailment": 33.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.8871, "eval_f1_for_task1624_disfl_qa_answerability_classification": 60.0, "eval_f1_for_task1631_open_pi_data_to_text": 86.9713, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 58.0, "eval_f1_for_task1659_billsum_title_generation": 37.668, "eval_f1_for_task1664_wino_bias_coreference_resolution": 66.181, "eval_f1_for_task1728_web_nlg_data_to_text": 60.9137, "eval_f1_for_task190_snli_textual_entailment": 0.0, "eval_f1_for_task199_multinli_textual_entailment": 42.0, "eval_f1_for_task200_multinli_textual_entailment": 79.0, "eval_f1_for_task201_multinli_textual_entailment": 22.0, "eval_f1_for_task202_multinli_textual_entailment": 5.0, "eval_f1_for_task219_rocstories_title_generation": 19.526, "eval_f1_for_task220_rocstories_title_generation": 94.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, "eval_f1_for_task232_iirc_answerability_classification": 46.0, "eval_f1_for_task233_iirc_answerability_classification": 43.0, "eval_f1_for_task242_tweetqa_answerability_classification": 80.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 65.2167, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 19.0175, "eval_f1_for_task288_gigaword_title_generation": 29.6002, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 15.6, "eval_f1_for_task329_gap_coreference_resolution": 53.0, "eval_f1_for_task330_gap_coreference_resolution": 64.0571, "eval_f1_for_task349_squad2.0_answerability_classification": 55.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 85.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 89.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 87.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 33.0109, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 39.0, "eval_f1_for_task402_grailqa_question_rewriting": 79.1495, "eval_f1_for_task418_persent_title_generation": 28.9889, "eval_f1_for_task442_com_qa_question_rewriting": 71.7301, "eval_f1_for_task500_scruples_title_generation": 17.6773, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 42.8557, "eval_f1_for_task520_aquamuse_answerability_classification": 62.0, "eval_f1_for_task569_recipe_nlg_title_generation": 41.0673, "eval_f1_for_task602_wikitext_title_generation": 15.0399, "eval_f1_for_task613_liar_keyword_tagging": 23.6667, "eval_f1_for_task614_glucose_cause_effect_classification": 33.7182, "eval_f1_for_task619_ohsumed_title_generation": 45.1335, "eval_f1_for_task620_ohsumed_keyword_tagging": 41.0, "eval_f1_for_task623_ohsumed_keyword_tagging": 92.0, "eval_f1_for_task640_e_snli_textual_entailment": 34.0, "eval_f1_for_task641_e_snli_textual_entailment": 33.0, "eval_f1_for_task642_e_snli_textual_entailment": 39.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.8762, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 27.1333, "eval_f1_for_task670_ambigqa_question_rewriting": 81.3624, "eval_f1_for_task671_ambigqa_question_rewriting": 58.3907, "eval_f1_for_task677_ollie_data_to_text": 34.321, "eval_f1_for_task738_perspectrum_textual_entailment": 29.0, "eval_f1_for_task743_eurlex_title_generation": 38.6867, "eval_f1_for_task760_msr_sqa_data_to_text": 4.1366, "eval_f1_for_task769_qed_title_generation": 72.5458, "eval_f1_for_task827_copa_cause_effect_classification": 57.0, "eval_f1_for_task828_copa_cause_effect_classification": 75.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 66.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 28.0, "eval_f1_for_task890_gwsd_textual_entailment": 40.0, "eval_f1_for_task891_gap_coreference_resolution": 61.0333, "eval_f1_for_task892_gap_coreference_resolution": 49.0, "eval_f1_for_task893_gap_coreference_resolution": 32.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 54.0, "eval_f1_for_task957_e2e_data_to_text": 56.6913, "eval_f1_for_task970_sherliic_textual_entailment": 69.0, "eval_f1_for_textual_entailment": 40.8333, "eval_f1_for_title_generation": 36.8583, "eval_f1_for_word_analogy": 47.8333, "eval_gen_len": 8.8664, "eval_global_step": 2000, "eval_loss": 1.1055411100387573, "eval_rouge1": 52.3648, "eval_rouge1_for_answerability_classification": 59.0256, "eval_rouge1_for_cause_effect_classification": 64.0442, "eval_rouge1_for_coreference_resolution": 49.492, "eval_rouge1_for_data_to_text": 55.6999, "eval_rouge1_for_dialogue_act_recognition": 58.3435, "eval_rouge1_for_grammar_error_correction": 61.5676, "eval_rouge1_for_keyword_tagging": 66.9408, "eval_rouge1_for_overlap_extraction": 29.9877, "eval_rouge1_for_question_rewriting": 71.2311, "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 52.6667, "eval_rouge1_for_task034_winogrande_question_rewriting": 88.6501, "eval_rouge1_for_task035_winogrande_question_rewriting": 86.7212, "eval_rouge1_for_task036_qasc_keyword_tagging": 65.342, "eval_rouge1_for_task039_qasc_overlap_extraction": 40.0, "eval_rouge1_for_task050_multirc_answerability_classification": 72.0, "eval_rouge1_for_task102_commongen_data_to_text": 68.3918, "eval_rouge1_for_task1152_bard_word_analogy": 38.0, "eval_rouge1_for_task1153_bard_word_analogy": 33.0, "eval_rouge1_for_task1154_bard_word_analogy": 23.0, "eval_rouge1_for_task1155_bard_word_analogy": 84.0, "eval_rouge1_for_task1156_bard_word_analogy": 66.6667, "eval_rouge1_for_task1157_bard_word_analogy": 55.0, "eval_rouge1_for_task1158_bard_word_analogy": 42.0, "eval_rouge1_for_task1159_bard_word_analogy": 42.0, "eval_rouge1_for_task1161_coda_19_title_generation": 40.4206, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 81.1866, "eval_rouge1_for_task121_atomic_question_rewriting": 52.5271, "eval_rouge1_for_task133_winowhy_coreference_resolution": 53.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.3179, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 42.3774, "eval_rouge1_for_task1356_xlsum_title_generation": 27.7525, "eval_rouge1_for_task1358_xlsum_title_generation": 37.8959, "eval_rouge1_for_task1385_anli_textual_entailment": 28.0, "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 54.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 69.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 75.0714, "eval_rouge1_for_task1407_dart_data_to_text": 37.2449, "eval_rouge1_for_task1409_dart_data_to_text": 53.1772, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.3545, "eval_rouge1_for_task1439_doqa_answerability_classification": 43.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 56.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 27.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 54.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 44.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 40.054, "eval_rouge1_for_task1554_scitail_textual_entailment": 58.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.7808, "eval_rouge1_for_task1562_zest_question_rewriting": 54.0918, "eval_rouge1_for_task1586_scifact_title_generation": 40.4079, "eval_rouge1_for_task1598_nyc_data_to_text": 54.0514, "eval_rouge1_for_task1612_sick_textual_entailment": 41.0, "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.197, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 60.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 87.157, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 58.0, "eval_rouge1_for_task1659_billsum_title_generation": 40.0394, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 66.181, "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.9558, "eval_rouge1_for_task190_snli_textual_entailment": 0.0, "eval_rouge1_for_task199_multinli_textual_entailment": 42.0, "eval_rouge1_for_task200_multinli_textual_entailment": 79.0, "eval_rouge1_for_task201_multinli_textual_entailment": 22.0, "eval_rouge1_for_task202_multinli_textual_entailment": 5.0, "eval_rouge1_for_task219_rocstories_title_generation": 23.964, "eval_rouge1_for_task220_rocstories_title_generation": 94.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, "eval_rouge1_for_task232_iirc_answerability_classification": 46.0, "eval_rouge1_for_task233_iirc_answerability_classification": 43.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 80.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 65.8833, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 19.9754, "eval_rouge1_for_task288_gigaword_title_generation": 32.9297, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 16.2667, "eval_rouge1_for_task329_gap_coreference_resolution": 53.0, "eval_rouge1_for_task330_gap_coreference_resolution": 64.1571, "eval_rouge1_for_task349_squad2.0_answerability_classification": 55.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 85.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 89.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.7317, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 46.6667, "eval_rouge1_for_task402_grailqa_question_rewriting": 81.0445, "eval_rouge1_for_task418_persent_title_generation": 32.9195, "eval_rouge1_for_task442_com_qa_question_rewriting": 75.2518, "eval_rouge1_for_task500_scruples_title_generation": 19.4886, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.5264, "eval_rouge1_for_task520_aquamuse_answerability_classification": 62.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 42.3676, "eval_rouge1_for_task602_wikitext_title_generation": 15.6201, "eval_rouge1_for_task613_liar_keyword_tagging": 36.1667, "eval_rouge1_for_task614_glucose_cause_effect_classification": 36.9108, "eval_rouge1_for_task619_ohsumed_title_generation": 47.9049, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 46.5333, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 92.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 34.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 39.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.6619, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 28.7, "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.1221, "eval_rouge1_for_task671_ambigqa_question_rewriting": 59.3725, "eval_rouge1_for_task677_ollie_data_to_text": 36.9672, "eval_rouge1_for_task738_perspectrum_textual_entailment": 76.0, "eval_rouge1_for_task743_eurlex_title_generation": 40.2297, "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.5893, "eval_rouge1_for_task769_qed_title_generation": 72.5977, "eval_rouge1_for_task827_copa_cause_effect_classification": 57.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 75.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 66.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.3333, "eval_rouge1_for_task890_gwsd_textual_entailment": 40.0, "eval_rouge1_for_task891_gap_coreference_resolution": 61.3667, "eval_rouge1_for_task892_gap_coreference_resolution": 49.0, "eval_rouge1_for_task893_gap_coreference_resolution": 32.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 54.0, "eval_rouge1_for_task957_e2e_data_to_text": 58.9432, "eval_rouge1_for_task970_sherliic_textual_entailment": 69.0, "eval_rouge1_for_textual_entailment": 44.6528, "eval_rouge1_for_title_generation": 39.4023, "eval_rouge1_for_word_analogy": 47.9583, "eval_rougeL": 50.8539, "eval_rougeL_for_answerability_classification": 59.0256, "eval_rougeL_for_cause_effect_classification": 63.4737, "eval_rougeL_for_coreference_resolution": 49.492, "eval_rougeL_for_data_to_text": 47.6089, "eval_rougeL_for_dialogue_act_recognition": 58.3435, "eval_rougeL_for_grammar_error_correction": 60.8819, "eval_rougeL_for_keyword_tagging": 66.3451, "eval_rougeL_for_overlap_extraction": 29.8431, "eval_rougeL_for_question_rewriting": 67.368, "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 52.6667, "eval_rougeL_for_task034_winogrande_question_rewriting": 87.3646, "eval_rougeL_for_task035_winogrande_question_rewriting": 85.6625, "eval_rougeL_for_task036_qasc_keyword_tagging": 64.3471, "eval_rougeL_for_task039_qasc_overlap_extraction": 40.0, "eval_rougeL_for_task050_multirc_answerability_classification": 72.0, "eval_rougeL_for_task102_commongen_data_to_text": 62.4196, "eval_rougeL_for_task1152_bard_word_analogy": 38.0, "eval_rougeL_for_task1153_bard_word_analogy": 33.0, "eval_rougeL_for_task1154_bard_word_analogy": 23.0, "eval_rougeL_for_task1155_bard_word_analogy": 84.0, "eval_rougeL_for_task1156_bard_word_analogy": 66.6667, "eval_rougeL_for_task1157_bard_word_analogy": 55.0, "eval_rougeL_for_task1158_bard_word_analogy": 42.0, "eval_rougeL_for_task1159_bard_word_analogy": 42.0, "eval_rougeL_for_task1161_coda_19_title_generation": 33.7519, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 80.7837, "eval_rougeL_for_task121_atomic_question_rewriting": 46.8622, "eval_rougeL_for_task133_winowhy_coreference_resolution": 53.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.6981, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 39.3098, "eval_rougeL_for_task1356_xlsum_title_generation": 22.9569, "eval_rougeL_for_task1358_xlsum_title_generation": 32.1285, "eval_rougeL_for_task1385_anli_textual_entailment": 28.0, "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 54.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 69.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 75.0714, "eval_rougeL_for_task1407_dart_data_to_text": 30.9297, "eval_rougeL_for_task1409_dart_data_to_text": 45.6831, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.9026, "eval_rougeL_for_task1439_doqa_answerability_classification": 43.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 56.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 27.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 54.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 44.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 35.5919, "eval_rougeL_for_task1554_scitail_textual_entailment": 58.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.8613, "eval_rougeL_for_task1562_zest_question_rewriting": 47.1829, "eval_rougeL_for_task1586_scifact_title_generation": 33.0565, "eval_rougeL_for_task1598_nyc_data_to_text": 42.2215, "eval_rougeL_for_task1612_sick_textual_entailment": 41.0, "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.4583, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 60.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 80.9868, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 58.0, "eval_rougeL_for_task1659_billsum_title_generation": 34.2535, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 66.181, "eval_rougeL_for_task1728_web_nlg_data_to_text": 54.7425, "eval_rougeL_for_task190_snli_textual_entailment": 0.0, "eval_rougeL_for_task199_multinli_textual_entailment": 42.0, "eval_rougeL_for_task200_multinli_textual_entailment": 79.0, "eval_rougeL_for_task201_multinli_textual_entailment": 22.0, "eval_rougeL_for_task202_multinli_textual_entailment": 5.0, "eval_rougeL_for_task219_rocstories_title_generation": 23.7418, "eval_rougeL_for_task220_rocstories_title_generation": 94.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, "eval_rougeL_for_task232_iirc_answerability_classification": 46.0, "eval_rougeL_for_task233_iirc_answerability_classification": 43.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 80.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 65.8833, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 19.6862, "eval_rougeL_for_task288_gigaword_title_generation": 28.6248, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 16.2667, "eval_rougeL_for_task329_gap_coreference_resolution": 53.0, "eval_rougeL_for_task330_gap_coreference_resolution": 64.1571, "eval_rougeL_for_task349_squad2.0_answerability_classification": 55.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 85.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 89.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.7062, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 46.6667, "eval_rougeL_for_task402_grailqa_question_rewriting": 65.5661, "eval_rougeL_for_task418_persent_title_generation": 28.1081, "eval_rougeL_for_task442_com_qa_question_rewriting": 69.5747, "eval_rougeL_for_task500_scruples_title_generation": 18.3571, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 43.2191, "eval_rougeL_for_task520_aquamuse_answerability_classification": 62.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 39.6819, "eval_rougeL_for_task602_wikitext_title_generation": 15.6201, "eval_rougeL_for_task613_liar_keyword_tagging": 36.1667, "eval_rougeL_for_task614_glucose_cause_effect_classification": 33.9431, "eval_rougeL_for_task619_ohsumed_title_generation": 39.4155, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 44.55, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 92.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 34.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 39.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.6619, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 28.7, "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.4146, "eval_rougeL_for_task671_ambigqa_question_rewriting": 57.8684, "eval_rougeL_for_task677_ollie_data_to_text": 30.3198, "eval_rougeL_for_task738_perspectrum_textual_entailment": 76.0, "eval_rougeL_for_task743_eurlex_title_generation": 35.8967, "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.9838, "eval_rougeL_for_task769_qed_title_generation": 72.3477, "eval_rougeL_for_task827_copa_cause_effect_classification": 57.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 75.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 66.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.3333, "eval_rougeL_for_task890_gwsd_textual_entailment": 40.0, "eval_rougeL_for_task891_gap_coreference_resolution": 61.3667, "eval_rougeL_for_task892_gap_coreference_resolution": 49.0, "eval_rougeL_for_task893_gap_coreference_resolution": 32.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 54.0, "eval_rougeL_for_task957_e2e_data_to_text": 44.911, "eval_rougeL_for_task970_sherliic_textual_entailment": 69.0, "eval_rougeL_for_textual_entailment": 44.6528, "eval_rougeL_for_title_generation": 35.9277, "eval_rougeL_for_word_analogy": 47.9583, "eval_runtime": 876.9259, "eval_samples_per_second": 13.582, "eval_steps_per_second": 0.85, "step": 2000 }, { "epoch": 0.55, "learning_rate": 5e-05, "loss": 1.0061, "step": 2500 }, { "epoch": 0.55, "eval_exact_match": 30.9572, "eval_exact_match_for_answerability_classification": 54.0769, "eval_exact_match_for_cause_effect_classification": 43.8571, "eval_exact_match_for_coreference_resolution": 43.3571, "eval_exact_match_for_data_to_text": 7.385, "eval_exact_match_for_dialogue_act_recognition": 47.8571, "eval_exact_match_for_grammar_error_correction": 7.0, "eval_exact_match_for_keyword_tagging": 41.2, "eval_exact_match_for_overlap_extraction": 12.0, "eval_exact_match_for_question_rewriting": 2.0, "eval_exact_match_for_task020_mctaco_answerability_classification": 49.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 57.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 2.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 9.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 11.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 24.0, "eval_exact_match_for_task050_multirc_answerability_classification": 61.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 39.0, "eval_exact_match_for_task1153_bard_word_analogy": 34.0, "eval_exact_match_for_task1154_bard_word_analogy": 30.0, "eval_exact_match_for_task1155_bard_word_analogy": 85.0, "eval_exact_match_for_task1156_bard_word_analogy": 52.0, "eval_exact_match_for_task1157_bard_word_analogy": 57.0, "eval_exact_match_for_task1158_bard_word_analogy": 53.0, "eval_exact_match_for_task1159_bard_word_analogy": 31.0, "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 54.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, "eval_exact_match_for_task1344_rte_textual_entailment": 54.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 2.0, "eval_exact_match_for_task1386_anli_textual_entailment": 10.0, "eval_exact_match_for_task1387_anli_textual_entailment": 8.0, "eval_exact_match_for_task1388_cb_textual_entailment": 1.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 52.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 62.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 47.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 1.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 46.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 55.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 36.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 52.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 42.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 58.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, "eval_exact_match_for_task1615_sick_textual_entailment": 44.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 54.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 54.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 61.0, "eval_exact_match_for_task1659_billsum_title_generation": 1.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 18.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, "eval_exact_match_for_task190_snli_textual_entailment": 16.0, "eval_exact_match_for_task199_multinli_textual_entailment": 45.0, "eval_exact_match_for_task200_multinli_textual_entailment": 78.0, "eval_exact_match_for_task201_multinli_textual_entailment": 26.0, "eval_exact_match_for_task202_multinli_textual_entailment": 2.0, "eval_exact_match_for_task219_rocstories_title_generation": 2.0, "eval_exact_match_for_task220_rocstories_title_generation": 75.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 55.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 54.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, "eval_exact_match_for_task330_gap_coreference_resolution": 62.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 57.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 60.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 49.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 33.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 5.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 69.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, "eval_exact_match_for_task613_liar_keyword_tagging": 22.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 21.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 68.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 2.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 37.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 42.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 84.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 32.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 42.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 65.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 78.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 68.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 35.0, "eval_exact_match_for_task891_gap_coreference_resolution": 56.0, "eval_exact_match_for_task892_gap_coreference_resolution": 52.0, "eval_exact_match_for_task893_gap_coreference_resolution": 51.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 69.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 54.0, "eval_exact_match_for_textual_entailment": 35.875, "eval_exact_match_for_title_generation": 9.3049, "eval_exact_match_for_word_analogy": 47.625, "eval_f1": 48.6069, "eval_f1_for_answerability_classification": 56.641, "eval_f1_for_cause_effect_classification": 64.1011, "eval_f1_for_coreference_resolution": 50.6162, "eval_f1_for_data_to_text": 52.6965, "eval_f1_for_dialogue_act_recognition": 50.7143, "eval_f1_for_grammar_error_correction": 58.1314, "eval_f1_for_keyword_tagging": 56.0981, "eval_f1_for_overlap_extraction": 35.9189, "eval_f1_for_question_rewriting": 70.3884, "eval_f1_for_task020_mctaco_answerability_classification": 49.0, "eval_f1_for_task033_winogrande_coreference_resolution": 60.1667, "eval_f1_for_task034_winogrande_question_rewriting": 90.3729, "eval_f1_for_task035_winogrande_question_rewriting": 84.9614, "eval_f1_for_task036_qasc_keyword_tagging": 59.1284, "eval_f1_for_task039_qasc_overlap_extraction": 26.5, "eval_f1_for_task050_multirc_answerability_classification": 61.0, "eval_f1_for_task102_commongen_data_to_text": 53.2782, "eval_f1_for_task1152_bard_word_analogy": 39.0, "eval_f1_for_task1153_bard_word_analogy": 34.0, "eval_f1_for_task1154_bard_word_analogy": 30.0, "eval_f1_for_task1155_bard_word_analogy": 85.0, "eval_f1_for_task1156_bard_word_analogy": 53.3333, "eval_f1_for_task1157_bard_word_analogy": 57.0, "eval_f1_for_task1158_bard_word_analogy": 53.0, "eval_f1_for_task1159_bard_word_analogy": 31.0, "eval_f1_for_task1161_coda_19_title_generation": 35.4176, "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.1587, "eval_f1_for_task121_atomic_question_rewriting": 49.5499, "eval_f1_for_task133_winowhy_coreference_resolution": 54.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.5878, "eval_f1_for_task1344_rte_textual_entailment": 54.0, "eval_f1_for_task1345_qqp_question_rewriting": 41.2736, "eval_f1_for_task1356_xlsum_title_generation": 23.2025, "eval_f1_for_task1358_xlsum_title_generation": 33.4253, "eval_f1_for_task1385_anli_textual_entailment": 2.0, "eval_f1_for_task1386_anli_textual_entailment": 10.0, "eval_f1_for_task1387_anli_textual_entailment": 8.0, "eval_f1_for_task1388_cb_textual_entailment": 1.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 52.0, "eval_f1_for_task1393_copa_cause_effect_classification": 62.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 47.0, "eval_f1_for_task1407_dart_data_to_text": 32.8231, "eval_f1_for_task1409_dart_data_to_text": 51.634, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 32.3259, "eval_f1_for_task1439_doqa_answerability_classification": 46.0, "eval_f1_for_task1442_doqa_answerability_classification": 55.0, "eval_f1_for_task1516_imppres_textual_entailment": 36.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 52.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_f1_for_task1540_peer_read_title_generation": 36.186, "eval_f1_for_task1554_scitail_textual_entailment": 58.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.937, "eval_f1_for_task1562_zest_question_rewriting": 46.9035, "eval_f1_for_task1586_scifact_title_generation": 35.9406, "eval_f1_for_task1598_nyc_data_to_text": 48.6891, "eval_f1_for_task1612_sick_textual_entailment": 33.0, "eval_f1_for_task1615_sick_textual_entailment": 44.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.0649, "eval_f1_for_task1624_disfl_qa_answerability_classification": 54.0, "eval_f1_for_task1631_open_pi_data_to_text": 95.0866, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 61.0, "eval_f1_for_task1659_billsum_title_generation": 35.5745, "eval_f1_for_task1664_wino_bias_coreference_resolution": 68.7619, "eval_f1_for_task1728_web_nlg_data_to_text": 64.8272, "eval_f1_for_task190_snli_textual_entailment": 16.0, "eval_f1_for_task199_multinli_textual_entailment": 45.0, "eval_f1_for_task200_multinli_textual_entailment": 78.0, "eval_f1_for_task201_multinli_textual_entailment": 26.0, "eval_f1_for_task202_multinli_textual_entailment": 2.0, "eval_f1_for_task219_rocstories_title_generation": 18.0475, "eval_f1_for_task220_rocstories_title_generation": 75.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, "eval_f1_for_task232_iirc_answerability_classification": 50.0, "eval_f1_for_task233_iirc_answerability_classification": 50.0, "eval_f1_for_task242_tweetqa_answerability_classification": 55.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 67.2881, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 45.3377, "eval_f1_for_task288_gigaword_title_generation": 28.6375, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_f1_for_task329_gap_coreference_resolution": 33.0, "eval_f1_for_task330_gap_coreference_resolution": 68.5524, "eval_f1_for_task349_squad2.0_answerability_classification": 57.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 80.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 83.0, "eval_f1_for_task393_cod3s_cause_effect_classification": 31.7112, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 35.9, "eval_f1_for_task402_grailqa_question_rewriting": 80.4494, "eval_f1_for_task418_persent_title_generation": 25.0612, "eval_f1_for_task442_com_qa_question_rewriting": 71.5387, "eval_f1_for_task500_scruples_title_generation": 14.8766, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 42.6089, "eval_f1_for_task520_aquamuse_answerability_classification": 69.0, "eval_f1_for_task569_recipe_nlg_title_generation": 38.2592, "eval_f1_for_task602_wikitext_title_generation": 14.2801, "eval_f1_for_task613_liar_keyword_tagging": 25.3333, "eval_f1_for_task614_glucose_cause_effect_classification": 42.6631, "eval_f1_for_task619_ohsumed_title_generation": 40.5692, "eval_f1_for_task620_ohsumed_keyword_tagging": 37.4048, "eval_f1_for_task623_ohsumed_keyword_tagging": 68.0, "eval_f1_for_task640_e_snli_textual_entailment": 2.0, "eval_f1_for_task641_e_snli_textual_entailment": 37.0, "eval_f1_for_task642_e_snli_textual_entailment": 42.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 90.6238, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 49.3238, "eval_f1_for_task670_ambigqa_question_rewriting": 82.1091, "eval_f1_for_task671_ambigqa_question_rewriting": 69.8905, "eval_f1_for_task677_ollie_data_to_text": 32.9293, "eval_f1_for_task738_perspectrum_textual_entailment": 42.0, "eval_f1_for_task743_eurlex_title_generation": 39.0379, "eval_f1_for_task760_msr_sqa_data_to_text": 4.7913, "eval_f1_for_task769_qed_title_generation": 71.5198, "eval_f1_for_task827_copa_cause_effect_classification": 78.0, "eval_f1_for_task828_copa_cause_effect_classification": 68.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, "eval_f1_for_task890_gwsd_textual_entailment": 35.0, "eval_f1_for_task891_gap_coreference_resolution": 63.6333, "eval_f1_for_task892_gap_coreference_resolution": 52.0, "eval_f1_for_task893_gap_coreference_resolution": 51.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 69.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task957_e2e_data_to_text": 54.7596, "eval_f1_for_task970_sherliic_textual_entailment": 54.0, "eval_f1_for_textual_entailment": 35.875, "eval_f1_for_title_generation": 34.6944, "eval_f1_for_word_analogy": 47.7917, "eval_gen_len": 9.0688, "eval_global_step": 2500, "eval_loss": 1.127640962600708, "eval_rouge1": 51.9785, "eval_rouge1_for_answerability_classification": 56.641, "eval_rouge1_for_cause_effect_classification": 65.0852, "eval_rouge1_for_coreference_resolution": 51.4741, "eval_rouge1_for_data_to_text": 55.3598, "eval_rouge1_for_dialogue_act_recognition": 54.0023, "eval_rouge1_for_grammar_error_correction": 63.0361, "eval_rouge1_for_keyword_tagging": 60.9879, "eval_rouge1_for_overlap_extraction": 39.4767, "eval_rouge1_for_question_rewriting": 71.9893, "eval_rouge1_for_task020_mctaco_answerability_classification": 49.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 60.0667, "eval_rouge1_for_task034_winogrande_question_rewriting": 90.4006, "eval_rouge1_for_task035_winogrande_question_rewriting": 85.7688, "eval_rouge1_for_task036_qasc_keyword_tagging": 62.411, "eval_rouge1_for_task039_qasc_overlap_extraction": 32.5, "eval_rouge1_for_task050_multirc_answerability_classification": 61.0, "eval_rouge1_for_task102_commongen_data_to_text": 66.6081, "eval_rouge1_for_task1152_bard_word_analogy": 39.0, "eval_rouge1_for_task1153_bard_word_analogy": 34.0, "eval_rouge1_for_task1154_bard_word_analogy": 30.0, "eval_rouge1_for_task1155_bard_word_analogy": 85.0, "eval_rouge1_for_task1156_bard_word_analogy": 53.3333, "eval_rouge1_for_task1157_bard_word_analogy": 57.0, "eval_rouge1_for_task1158_bard_word_analogy": 53.0, "eval_rouge1_for_task1159_bard_word_analogy": 31.0, "eval_rouge1_for_task1161_coda_19_title_generation": 38.7732, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.4353, "eval_rouge1_for_task121_atomic_question_rewriting": 52.1766, "eval_rouge1_for_task133_winowhy_coreference_resolution": 54.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.0706, "eval_rouge1_for_task1344_rte_textual_entailment": 54.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 44.2145, "eval_rouge1_for_task1356_xlsum_title_generation": 27.2687, "eval_rouge1_for_task1358_xlsum_title_generation": 37.7003, "eval_rouge1_for_task1385_anli_textual_entailment": 30.0, "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 52.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 62.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 52.3492, "eval_rouge1_for_task1407_dart_data_to_text": 33.5991, "eval_rouge1_for_task1409_dart_data_to_text": 52.4662, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 39.0241, "eval_rouge1_for_task1439_doqa_answerability_classification": 46.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 55.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 36.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 52.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_rouge1_for_task1540_peer_read_title_generation": 39.744, "eval_rouge1_for_task1554_scitail_textual_entailment": 58.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.0482, "eval_rouge1_for_task1562_zest_question_rewriting": 50.9732, "eval_rouge1_for_task1586_scifact_title_generation": 40.5942, "eval_rouge1_for_task1598_nyc_data_to_text": 49.0942, "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, "eval_rouge1_for_task1615_sick_textual_entailment": 81.3333, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.2717, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 54.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 95.1703, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 61.0, "eval_rouge1_for_task1659_billsum_title_generation": 38.0377, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 68.7619, "eval_rouge1_for_task1728_web_nlg_data_to_text": 66.6523, "eval_rouge1_for_task190_snli_textual_entailment": 16.0, "eval_rouge1_for_task199_multinli_textual_entailment": 45.0, "eval_rouge1_for_task200_multinli_textual_entailment": 78.0, "eval_rouge1_for_task201_multinli_textual_entailment": 26.0, "eval_rouge1_for_task202_multinli_textual_entailment": 2.0, "eval_rouge1_for_task219_rocstories_title_generation": 23.454, "eval_rouge1_for_task220_rocstories_title_generation": 75.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 55.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 68.1333, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 46.4533, "eval_rouge1_for_task288_gigaword_title_generation": 31.8753, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, "eval_rouge1_for_task330_gap_coreference_resolution": 68.819, "eval_rouge1_for_task349_squad2.0_answerability_classification": 57.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 80.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.0, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.0695, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 45.5667, "eval_rouge1_for_task402_grailqa_question_rewriting": 82.6034, "eval_rouge1_for_task418_persent_title_generation": 28.9952, "eval_rouge1_for_task442_com_qa_question_rewriting": 74.7278, "eval_rouge1_for_task500_scruples_title_generation": 17.0279, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.1174, "eval_rouge1_for_task520_aquamuse_answerability_classification": 69.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 39.7724, "eval_rouge1_for_task602_wikitext_title_generation": 15.3729, "eval_rouge1_for_task613_liar_keyword_tagging": 39.0, "eval_rouge1_for_task614_glucose_cause_effect_classification": 49.1935, "eval_rouge1_for_task619_ohsumed_title_generation": 44.5137, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.4048, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 68.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 36.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 37.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 42.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 91.1238, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 50.3238, "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.9064, "eval_rouge1_for_task671_ambigqa_question_rewriting": 70.4036, "eval_rouge1_for_task677_ollie_data_to_text": 36.0278, "eval_rouge1_for_task738_perspectrum_textual_entailment": 81.0, "eval_rouge1_for_task743_eurlex_title_generation": 40.5423, "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.8729, "eval_rouge1_for_task769_qed_title_generation": 71.8734, "eval_rouge1_for_task827_copa_cause_effect_classification": 78.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 68.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 38.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 35.0, "eval_rouge1_for_task891_gap_coreference_resolution": 63.9667, "eval_rouge1_for_task892_gap_coreference_resolution": 52.0, "eval_rouge1_for_task893_gap_coreference_resolution": 51.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 69.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, "eval_rouge1_for_task957_e2e_data_to_text": 56.3869, "eval_rouge1_for_task970_sherliic_textual_entailment": 54.0, "eval_rouge1_for_textual_entailment": 45.5556, "eval_rouge1_for_title_generation": 37.2911, "eval_rouge1_for_word_analogy": 47.7917, "eval_rougeL": 50.5361, "eval_rougeL_for_answerability_classification": 56.641, "eval_rougeL_for_cause_effect_classification": 64.0819, "eval_rougeL_for_coreference_resolution": 51.4741, "eval_rougeL_for_data_to_text": 47.7389, "eval_rougeL_for_dialogue_act_recognition": 54.0023, "eval_rougeL_for_grammar_error_correction": 62.2825, "eval_rougeL_for_keyword_tagging": 60.0735, "eval_rougeL_for_overlap_extraction": 38.6998, "eval_rougeL_for_question_rewriting": 68.4257, "eval_rougeL_for_task020_mctaco_answerability_classification": 49.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 60.0667, "eval_rougeL_for_task034_winogrande_question_rewriting": 90.4006, "eval_rougeL_for_task035_winogrande_question_rewriting": 84.5242, "eval_rougeL_for_task036_qasc_keyword_tagging": 59.8221, "eval_rougeL_for_task039_qasc_overlap_extraction": 32.5, "eval_rougeL_for_task050_multirc_answerability_classification": 61.0, "eval_rougeL_for_task102_commongen_data_to_text": 56.4767, "eval_rougeL_for_task1152_bard_word_analogy": 39.0, "eval_rougeL_for_task1153_bard_word_analogy": 34.0, "eval_rougeL_for_task1154_bard_word_analogy": 30.0, "eval_rougeL_for_task1155_bard_word_analogy": 85.0, "eval_rougeL_for_task1156_bard_word_analogy": 53.3333, "eval_rougeL_for_task1157_bard_word_analogy": 57.0, "eval_rougeL_for_task1158_bard_word_analogy": 53.0, "eval_rougeL_for_task1159_bard_word_analogy": 31.0, "eval_rougeL_for_task1161_coda_19_title_generation": 31.7659, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.8745, "eval_rougeL_for_task121_atomic_question_rewriting": 47.0123, "eval_rougeL_for_task133_winowhy_coreference_resolution": 54.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.7273, "eval_rougeL_for_task1344_rte_textual_entailment": 54.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 40.8843, "eval_rougeL_for_task1356_xlsum_title_generation": 22.4521, "eval_rougeL_for_task1358_xlsum_title_generation": 31.6792, "eval_rougeL_for_task1385_anli_textual_entailment": 30.0, "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 52.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 62.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 52.3492, "eval_rougeL_for_task1407_dart_data_to_text": 28.7645, "eval_rougeL_for_task1409_dart_data_to_text": 42.9728, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.4739, "eval_rougeL_for_task1439_doqa_answerability_classification": 46.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 55.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 36.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 52.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_rougeL_for_task1540_peer_read_title_generation": 37.2371, "eval_rougeL_for_task1554_scitail_textual_entailment": 58.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.0912, "eval_rougeL_for_task1562_zest_question_rewriting": 45.1223, "eval_rougeL_for_task1586_scifact_title_generation": 33.5952, "eval_rougeL_for_task1598_nyc_data_to_text": 38.5678, "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, "eval_rougeL_for_task1615_sick_textual_entailment": 81.3333, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.9749, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 54.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 94.2917, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 61.0, "eval_rougeL_for_task1659_billsum_title_generation": 32.0191, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 68.7619, "eval_rougeL_for_task1728_web_nlg_data_to_text": 57.5557, "eval_rougeL_for_task190_snli_textual_entailment": 16.0, "eval_rougeL_for_task199_multinli_textual_entailment": 45.0, "eval_rougeL_for_task200_multinli_textual_entailment": 78.0, "eval_rougeL_for_task201_multinli_textual_entailment": 26.0, "eval_rougeL_for_task202_multinli_textual_entailment": 2.0, "eval_rougeL_for_task219_rocstories_title_generation": 23.454, "eval_rougeL_for_task220_rocstories_title_generation": 75.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 55.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 68.1333, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 44.8996, "eval_rougeL_for_task288_gigaword_title_generation": 27.7812, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, "eval_rougeL_for_task330_gap_coreference_resolution": 68.819, "eval_rougeL_for_task349_squad2.0_answerability_classification": 57.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 80.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.0, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.563, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 45.5667, "eval_rougeL_for_task402_grailqa_question_rewriting": 66.5365, "eval_rougeL_for_task418_persent_title_generation": 25.7476, "eval_rougeL_for_task442_com_qa_question_rewriting": 70.8325, "eval_rougeL_for_task500_scruples_title_generation": 15.8632, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.9221, "eval_rougeL_for_task520_aquamuse_answerability_classification": 69.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 38.2551, "eval_rougeL_for_task602_wikitext_title_generation": 15.3729, "eval_rougeL_for_task613_liar_keyword_tagging": 39.0, "eval_rougeL_for_task614_glucose_cause_effect_classification": 43.6774, "eval_rougeL_for_task619_ohsumed_title_generation": 37.8918, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 42.4214, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 68.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 36.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 37.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 42.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 91.1238, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 50.3238, "eval_rougeL_for_task670_ambigqa_question_rewriting": 82.1105, "eval_rougeL_for_task671_ambigqa_question_rewriting": 69.4101, "eval_rougeL_for_task677_ollie_data_to_text": 30.455, "eval_rougeL_for_task738_perspectrum_textual_entailment": 81.0, "eval_rougeL_for_task743_eurlex_title_generation": 36.362, "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.323, "eval_rougeL_for_task769_qed_title_generation": 71.6234, "eval_rougeL_for_task827_copa_cause_effect_classification": 78.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 68.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 38.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 35.0, "eval_rougeL_for_task891_gap_coreference_resolution": 63.9667, "eval_rougeL_for_task892_gap_coreference_resolution": 52.0, "eval_rougeL_for_task893_gap_coreference_resolution": 51.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 69.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, "eval_rougeL_for_task957_e2e_data_to_text": 44.1149, "eval_rougeL_for_task970_sherliic_textual_entailment": 54.0, "eval_rougeL_for_textual_entailment": 45.5556, "eval_rougeL_for_title_generation": 34.209, "eval_rougeL_for_word_analogy": 47.7917, "eval_runtime": 878.4904, "eval_samples_per_second": 13.557, "eval_steps_per_second": 0.848, "step": 2500 }, { "epoch": 0.66, "learning_rate": 5e-05, "loss": 0.988, "step": 3000 }, { "epoch": 0.66, "eval_exact_match": 32.0739, "eval_exact_match_for_answerability_classification": 58.3077, "eval_exact_match_for_cause_effect_classification": 38.8571, "eval_exact_match_for_coreference_resolution": 43.0714, "eval_exact_match_for_data_to_text": 5.6901, "eval_exact_match_for_dialogue_act_recognition": 50.0, "eval_exact_match_for_grammar_error_correction": 7.5, "eval_exact_match_for_keyword_tagging": 44.2, "eval_exact_match_for_overlap_extraction": 11.5, "eval_exact_match_for_question_rewriting": 2.4545, "eval_exact_match_for_task020_mctaco_answerability_classification": 56.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 52.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 6.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 13.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, "eval_exact_match_for_task050_multirc_answerability_classification": 81.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 47.0, "eval_exact_match_for_task1153_bard_word_analogy": 36.0, "eval_exact_match_for_task1154_bard_word_analogy": 36.0, "eval_exact_match_for_task1155_bard_word_analogy": 83.0, "eval_exact_match_for_task1156_bard_word_analogy": 58.0, "eval_exact_match_for_task1157_bard_word_analogy": 52.0, "eval_exact_match_for_task1158_bard_word_analogy": 49.0, "eval_exact_match_for_task1159_bard_word_analogy": 29.0, "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 8.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 51.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 51.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 7.0, "eval_exact_match_for_task1386_anli_textual_entailment": 12.0, "eval_exact_match_for_task1387_anli_textual_entailment": 11.0, "eval_exact_match_for_task1388_cb_textual_entailment": 2.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 54.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 62.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 56.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 2.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 45.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 58.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 45.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 70.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 42.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 56.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 84.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 45.0, "eval_exact_match_for_task1615_sick_textual_entailment": 44.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 59.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 41.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 55.0, "eval_exact_match_for_task1659_billsum_title_generation": 4.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, "eval_exact_match_for_task190_snli_textual_entailment": 0.0, "eval_exact_match_for_task199_multinli_textual_entailment": 44.0, "eval_exact_match_for_task200_multinli_textual_entailment": 84.0, "eval_exact_match_for_task201_multinli_textual_entailment": 16.0, "eval_exact_match_for_task202_multinli_textual_entailment": 3.0, "eval_exact_match_for_task219_rocstories_title_generation": 2.0, "eval_exact_match_for_task220_rocstories_title_generation": 82.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 55.0, "eval_exact_match_for_task232_iirc_answerability_classification": 53.0, "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 93.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 51.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_exact_match_for_task329_gap_coreference_resolution": 48.0, "eval_exact_match_for_task330_gap_coreference_resolution": 68.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 62.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 35.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, "eval_exact_match_for_task500_scruples_title_generation": 1.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 14.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 90.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 36.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 44.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 51.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 35.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 73.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 60.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 44.0, "eval_exact_match_for_task891_gap_coreference_resolution": 59.0, "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, "eval_exact_match_for_task893_gap_coreference_resolution": 31.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 48.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 71.0, "eval_exact_match_for_textual_entailment": 38.75, "eval_exact_match_for_title_generation": 10.3139, "eval_exact_match_for_word_analogy": 48.75, "eval_f1": 49.7517, "eval_f1_for_answerability_classification": 60.8718, "eval_f1_for_cause_effect_classification": 58.5428, "eval_f1_for_coreference_resolution": 51.0866, "eval_f1_for_data_to_text": 52.3755, "eval_f1_for_dialogue_act_recognition": 52.7143, "eval_f1_for_grammar_error_correction": 56.904, "eval_f1_for_keyword_tagging": 60.6229, "eval_f1_for_overlap_extraction": 33.2525, "eval_f1_for_question_rewriting": 70.2848, "eval_f1_for_task020_mctaco_answerability_classification": 56.0, "eval_f1_for_task033_winogrande_coreference_resolution": 53.3333, "eval_f1_for_task034_winogrande_question_rewriting": 88.0195, "eval_f1_for_task035_winogrande_question_rewriting": 82.8009, "eval_f1_for_task036_qasc_keyword_tagging": 64.1708, "eval_f1_for_task039_qasc_overlap_extraction": 24.8333, "eval_f1_for_task050_multirc_answerability_classification": 81.0, "eval_f1_for_task102_commongen_data_to_text": 53.6856, "eval_f1_for_task1152_bard_word_analogy": 47.0, "eval_f1_for_task1153_bard_word_analogy": 36.0, "eval_f1_for_task1154_bard_word_analogy": 36.0, "eval_f1_for_task1155_bard_word_analogy": 83.0, "eval_f1_for_task1156_bard_word_analogy": 58.6667, "eval_f1_for_task1157_bard_word_analogy": 52.0, "eval_f1_for_task1158_bard_word_analogy": 49.0, "eval_f1_for_task1159_bard_word_analogy": 31.6667, "eval_f1_for_task1161_coda_19_title_generation": 36.9217, "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.7475, "eval_f1_for_task121_atomic_question_rewriting": 51.2321, "eval_f1_for_task133_winowhy_coreference_resolution": 51.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.3221, "eval_f1_for_task1344_rte_textual_entailment": 51.0, "eval_f1_for_task1345_qqp_question_rewriting": 40.226, "eval_f1_for_task1356_xlsum_title_generation": 20.3803, "eval_f1_for_task1358_xlsum_title_generation": 33.3315, "eval_f1_for_task1385_anli_textual_entailment": 7.0, "eval_f1_for_task1386_anli_textual_entailment": 12.0, "eval_f1_for_task1387_anli_textual_entailment": 11.0, "eval_f1_for_task1388_cb_textual_entailment": 2.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 54.0, "eval_f1_for_task1393_copa_cause_effect_classification": 62.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 56.0, "eval_f1_for_task1407_dart_data_to_text": 34.7818, "eval_f1_for_task1409_dart_data_to_text": 52.7903, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.7199, "eval_f1_for_task1439_doqa_answerability_classification": 45.0, "eval_f1_for_task1442_doqa_answerability_classification": 58.0, "eval_f1_for_task1516_imppres_textual_entailment": 45.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 70.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 36.1245, "eval_f1_for_task1554_scitail_textual_entailment": 84.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.0881, "eval_f1_for_task1562_zest_question_rewriting": 50.3109, "eval_f1_for_task1586_scifact_title_generation": 37.5252, "eval_f1_for_task1598_nyc_data_to_text": 51.7437, "eval_f1_for_task1612_sick_textual_entailment": 45.0, "eval_f1_for_task1615_sick_textual_entailment": 44.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.0159, "eval_f1_for_task1624_disfl_qa_answerability_classification": 59.0, "eval_f1_for_task1631_open_pi_data_to_text": 91.933, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 55.0, "eval_f1_for_task1659_billsum_title_generation": 37.0838, "eval_f1_for_task1664_wino_bias_coreference_resolution": 61.1333, "eval_f1_for_task1728_web_nlg_data_to_text": 60.725, "eval_f1_for_task190_snli_textual_entailment": 0.0, "eval_f1_for_task199_multinli_textual_entailment": 44.0, "eval_f1_for_task200_multinli_textual_entailment": 84.0, "eval_f1_for_task201_multinli_textual_entailment": 16.0, "eval_f1_for_task202_multinli_textual_entailment": 3.0, "eval_f1_for_task219_rocstories_title_generation": 14.1205, "eval_f1_for_task220_rocstories_title_generation": 82.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 55.0, "eval_f1_for_task232_iirc_answerability_classification": 53.0, "eval_f1_for_task233_iirc_answerability_classification": 50.0, "eval_f1_for_task242_tweetqa_answerability_classification": 93.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 64.0548, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 41.6717, "eval_f1_for_task288_gigaword_title_generation": 29.6567, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 9.3333, "eval_f1_for_task329_gap_coreference_resolution": 48.0, "eval_f1_for_task330_gap_coreference_resolution": 74.3905, "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 81.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 30.8376, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 37.0, "eval_f1_for_task402_grailqa_question_rewriting": 79.2214, "eval_f1_for_task418_persent_title_generation": 24.7467, "eval_f1_for_task442_com_qa_question_rewriting": 70.6665, "eval_f1_for_task500_scruples_title_generation": 13.1338, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 40.4987, "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, "eval_f1_for_task569_recipe_nlg_title_generation": 36.3311, "eval_f1_for_task602_wikitext_title_generation": 15.9811, "eval_f1_for_task613_liar_keyword_tagging": 22.3333, "eval_f1_for_task614_glucose_cause_effect_classification": 40.2956, "eval_f1_for_task619_ohsumed_title_generation": 43.2189, "eval_f1_for_task620_ohsumed_keyword_tagging": 32.8152, "eval_f1_for_task623_ohsumed_keyword_tagging": 90.0, "eval_f1_for_task640_e_snli_textual_entailment": 36.0, "eval_f1_for_task641_e_snli_textual_entailment": 33.0, "eval_f1_for_task642_e_snli_textual_entailment": 44.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.7952, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 64.2667, "eval_f1_for_task670_ambigqa_question_rewriting": 82.3235, "eval_f1_for_task671_ambigqa_question_rewriting": 68.5685, "eval_f1_for_task677_ollie_data_to_text": 32.8628, "eval_f1_for_task738_perspectrum_textual_entailment": 35.0, "eval_f1_for_task743_eurlex_title_generation": 34.3783, "eval_f1_for_task760_msr_sqa_data_to_text": 3.4617, "eval_f1_for_task769_qed_title_generation": 81.3651, "eval_f1_for_task827_copa_cause_effect_classification": 50.0, "eval_f1_for_task828_copa_cause_effect_classification": 60.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, "eval_f1_for_task890_gwsd_textual_entailment": 44.0, "eval_f1_for_task891_gap_coreference_resolution": 67.7, "eval_f1_for_task892_gap_coreference_resolution": 50.0, "eval_f1_for_task893_gap_coreference_resolution": 31.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 48.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task957_e2e_data_to_text": 53.1994, "eval_f1_for_task970_sherliic_textual_entailment": 71.0, "eval_f1_for_textual_entailment": 38.75, "eval_f1_for_title_generation": 35.1773, "eval_f1_for_word_analogy": 49.1667, "eval_gen_len": 8.9161, "eval_global_step": 3000, "eval_loss": 1.1237024068832397, "eval_rouge1": 52.5211, "eval_rouge1_for_answerability_classification": 60.8718, "eval_rouge1_for_cause_effect_classification": 59.1346, "eval_rouge1_for_coreference_resolution": 51.7966, "eval_rouge1_for_data_to_text": 55.5616, "eval_rouge1_for_dialogue_act_recognition": 56.8968, "eval_rouge1_for_grammar_error_correction": 61.4727, "eval_rouge1_for_keyword_tagging": 65.1068, "eval_rouge1_for_overlap_extraction": 36.2045, "eval_rouge1_for_question_rewriting": 71.9328, "eval_rouge1_for_task020_mctaco_answerability_classification": 56.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 53.3333, "eval_rouge1_for_task034_winogrande_question_rewriting": 88.0628, "eval_rouge1_for_task035_winogrande_question_rewriting": 83.7817, "eval_rouge1_for_task036_qasc_keyword_tagging": 65.5859, "eval_rouge1_for_task039_qasc_overlap_extraction": 29.5, "eval_rouge1_for_task050_multirc_answerability_classification": 81.0, "eval_rouge1_for_task102_commongen_data_to_text": 67.607, "eval_rouge1_for_task1152_bard_word_analogy": 47.0, "eval_rouge1_for_task1153_bard_word_analogy": 39.0, "eval_rouge1_for_task1154_bard_word_analogy": 36.0, "eval_rouge1_for_task1155_bard_word_analogy": 83.0, "eval_rouge1_for_task1156_bard_word_analogy": 58.6667, "eval_rouge1_for_task1157_bard_word_analogy": 52.0, "eval_rouge1_for_task1158_bard_word_analogy": 49.0, "eval_rouge1_for_task1159_bard_word_analogy": 31.6667, "eval_rouge1_for_task1161_coda_19_title_generation": 40.3013, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 81.0011, "eval_rouge1_for_task121_atomic_question_rewriting": 53.8487, "eval_rouge1_for_task133_winowhy_coreference_resolution": 51.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.1776, "eval_rouge1_for_task1344_rte_textual_entailment": 51.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 43.3993, "eval_rouge1_for_task1356_xlsum_title_generation": 24.1238, "eval_rouge1_for_task1358_xlsum_title_generation": 37.729, "eval_rouge1_for_task1385_anli_textual_entailment": 30.0, "eval_rouge1_for_task1386_anli_textual_entailment": 36.0, "eval_rouge1_for_task1387_anli_textual_entailment": 38.0, "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 54.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 62.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 59.2778, "eval_rouge1_for_task1407_dart_data_to_text": 36.8631, "eval_rouge1_for_task1409_dart_data_to_text": 53.8784, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.9416, "eval_rouge1_for_task1439_doqa_answerability_classification": 45.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 58.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 45.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 70.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 52.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 38.9449, "eval_rouge1_for_task1554_scitail_textual_entailment": 84.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.0039, "eval_rouge1_for_task1562_zest_question_rewriting": 54.206, "eval_rouge1_for_task1586_scifact_title_generation": 41.5341, "eval_rouge1_for_task1598_nyc_data_to_text": 53.0106, "eval_rouge1_for_task1612_sick_textual_entailment": 45.0, "eval_rouge1_for_task1615_sick_textual_entailment": 81.3333, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.2326, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 59.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 92.181, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 55.0, "eval_rouge1_for_task1659_billsum_title_generation": 39.0421, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 61.1333, "eval_rouge1_for_task1728_web_nlg_data_to_text": 63.2034, "eval_rouge1_for_task190_snli_textual_entailment": 0.0, "eval_rouge1_for_task199_multinli_textual_entailment": 44.0, "eval_rouge1_for_task200_multinli_textual_entailment": 84.0, "eval_rouge1_for_task201_multinli_textual_entailment": 16.0, "eval_rouge1_for_task202_multinli_textual_entailment": 3.0, "eval_rouge1_for_task219_rocstories_title_generation": 18.7444, "eval_rouge1_for_task220_rocstories_title_generation": 82.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 55.0, "eval_rouge1_for_task232_iirc_answerability_classification": 53.0, "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 93.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 64.9, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 42.909, "eval_rouge1_for_task288_gigaword_title_generation": 32.624, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 10.0, "eval_rouge1_for_task329_gap_coreference_resolution": 48.0, "eval_rouge1_for_task330_gap_coreference_resolution": 74.319, "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 81.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.2426, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 43.6667, "eval_rouge1_for_task402_grailqa_question_rewriting": 81.5048, "eval_rouge1_for_task418_persent_title_generation": 28.5138, "eval_rouge1_for_task442_com_qa_question_rewriting": 73.9483, "eval_rouge1_for_task500_scruples_title_generation": 15.1267, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 40.9421, "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 37.9936, "eval_rouge1_for_task602_wikitext_title_generation": 16.8069, "eval_rouge1_for_task613_liar_keyword_tagging": 35.5, "eval_rouge1_for_task614_glucose_cause_effect_classification": 44.0329, "eval_rouge1_for_task619_ohsumed_title_generation": 46.5591, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 40.1528, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 90.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 36.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 44.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.2952, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 65.7667, "eval_rouge1_for_task670_ambigqa_question_rewriting": 83.0609, "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.2143, "eval_rouge1_for_task677_ollie_data_to_text": 35.987, "eval_rouge1_for_task738_perspectrum_textual_entailment": 45.0, "eval_rouge1_for_task743_eurlex_title_generation": 36.1726, "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.8349, "eval_rouge1_for_task769_qed_title_generation": 81.1504, "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 60.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 39.0, "eval_rouge1_for_task890_gwsd_textual_entailment": 44.0, "eval_rouge1_for_task891_gap_coreference_resolution": 68.0333, "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, "eval_rouge1_for_task893_gap_coreference_resolution": 31.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 48.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task957_e2e_data_to_text": 55.2113, "eval_rouge1_for_task970_sherliic_textual_entailment": 71.0, "eval_rouge1_for_textual_entailment": 45.3889, "eval_rouge1_for_title_generation": 37.5447, "eval_rouge1_for_word_analogy": 49.5417, "eval_rougeL": 51.0833, "eval_rougeL_for_answerability_classification": 60.8718, "eval_rougeL_for_cause_effect_classification": 58.5927, "eval_rougeL_for_coreference_resolution": 51.7966, "eval_rougeL_for_data_to_text": 47.4526, "eval_rougeL_for_dialogue_act_recognition": 56.8968, "eval_rougeL_for_grammar_error_correction": 60.4593, "eval_rougeL_for_keyword_tagging": 64.3441, "eval_rougeL_for_overlap_extraction": 35.7141, "eval_rougeL_for_question_rewriting": 68.3663, "eval_rougeL_for_task020_mctaco_answerability_classification": 56.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 53.3333, "eval_rougeL_for_task034_winogrande_question_rewriting": 87.9795, "eval_rougeL_for_task035_winogrande_question_rewriting": 82.3602, "eval_rougeL_for_task036_qasc_keyword_tagging": 62.7597, "eval_rougeL_for_task039_qasc_overlap_extraction": 29.5, "eval_rougeL_for_task050_multirc_answerability_classification": 81.0, "eval_rougeL_for_task102_commongen_data_to_text": 56.4169, "eval_rougeL_for_task1152_bard_word_analogy": 47.0, "eval_rougeL_for_task1153_bard_word_analogy": 39.0, "eval_rougeL_for_task1154_bard_word_analogy": 36.0, "eval_rougeL_for_task1155_bard_word_analogy": 83.0, "eval_rougeL_for_task1156_bard_word_analogy": 58.6667, "eval_rougeL_for_task1157_bard_word_analogy": 52.0, "eval_rougeL_for_task1158_bard_word_analogy": 49.0, "eval_rougeL_for_task1159_bard_word_analogy": 31.6667, "eval_rougeL_for_task1161_coda_19_title_generation": 33.8207, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.9807, "eval_rougeL_for_task121_atomic_question_rewriting": 48.8981, "eval_rougeL_for_task133_winowhy_coreference_resolution": 51.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.6959, "eval_rougeL_for_task1344_rte_textual_entailment": 51.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 40.5446, "eval_rougeL_for_task1356_xlsum_title_generation": 20.7876, "eval_rougeL_for_task1358_xlsum_title_generation": 31.8999, "eval_rougeL_for_task1385_anli_textual_entailment": 30.0, "eval_rougeL_for_task1386_anli_textual_entailment": 36.0, "eval_rougeL_for_task1387_anli_textual_entailment": 38.0, "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 54.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 62.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 59.2778, "eval_rougeL_for_task1407_dart_data_to_text": 31.8529, "eval_rougeL_for_task1409_dart_data_to_text": 45.3444, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.9232, "eval_rougeL_for_task1439_doqa_answerability_classification": 45.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 58.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 45.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 70.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 52.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 56.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 35.8564, "eval_rougeL_for_task1554_scitail_textual_entailment": 84.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 84.9953, "eval_rougeL_for_task1562_zest_question_rewriting": 47.1633, "eval_rougeL_for_task1586_scifact_title_generation": 34.3425, "eval_rougeL_for_task1598_nyc_data_to_text": 41.3348, "eval_rougeL_for_task1612_sick_textual_entailment": 45.0, "eval_rougeL_for_task1615_sick_textual_entailment": 81.3333, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.9357, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 59.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 89.8661, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 55.0, "eval_rougeL_for_task1659_billsum_title_generation": 34.0119, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 61.1333, "eval_rougeL_for_task1728_web_nlg_data_to_text": 55.1078, "eval_rougeL_for_task190_snli_textual_entailment": 0.0, "eval_rougeL_for_task199_multinli_textual_entailment": 44.0, "eval_rougeL_for_task200_multinli_textual_entailment": 84.0, "eval_rougeL_for_task201_multinli_textual_entailment": 16.0, "eval_rougeL_for_task202_multinli_textual_entailment": 3.0, "eval_rougeL_for_task219_rocstories_title_generation": 18.2444, "eval_rougeL_for_task220_rocstories_title_generation": 82.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 55.0, "eval_rougeL_for_task232_iirc_answerability_classification": 53.0, "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 93.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 64.9, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 41.9283, "eval_rougeL_for_task288_gigaword_title_generation": 28.6853, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 10.0, "eval_rougeL_for_task329_gap_coreference_resolution": 48.0, "eval_rougeL_for_task330_gap_coreference_resolution": 74.319, "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 81.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.6679, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 43.6667, "eval_rougeL_for_task402_grailqa_question_rewriting": 66.5403, "eval_rougeL_for_task418_persent_title_generation": 24.59, "eval_rougeL_for_task442_com_qa_question_rewriting": 69.7218, "eval_rougeL_for_task500_scruples_title_generation": 13.7654, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 40.6434, "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 36.3828, "eval_rougeL_for_task602_wikitext_title_generation": 16.7248, "eval_rougeL_for_task613_liar_keyword_tagging": 35.5, "eval_rougeL_for_task614_glucose_cause_effect_classification": 40.8145, "eval_rougeL_for_task619_ohsumed_title_generation": 39.6895, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 39.1656, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 90.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 36.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 44.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.2952, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 65.7667, "eval_rougeL_for_task670_ambigqa_question_rewriting": 82.5584, "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.3462, "eval_rougeL_for_task677_ollie_data_to_text": 28.6822, "eval_rougeL_for_task738_perspectrum_textual_entailment": 45.0, "eval_rougeL_for_task743_eurlex_title_generation": 31.7671, "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.236, "eval_rougeL_for_task769_qed_title_generation": 81.1504, "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 60.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 61.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 39.0, "eval_rougeL_for_task890_gwsd_textual_entailment": 44.0, "eval_rougeL_for_task891_gap_coreference_resolution": 68.0333, "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, "eval_rougeL_for_task893_gap_coreference_resolution": 31.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 48.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task957_e2e_data_to_text": 42.5116, "eval_rougeL_for_task970_sherliic_textual_entailment": 71.0, "eval_rougeL_for_textual_entailment": 45.3889, "eval_rougeL_for_title_generation": 34.4945, "eval_rougeL_for_word_analogy": 49.5417, "eval_runtime": 863.6114, "eval_samples_per_second": 13.791, "eval_steps_per_second": 0.863, "step": 3000 }, { "epoch": 0.76, "learning_rate": 5e-05, "loss": 0.9545, "step": 3500 }, { "epoch": 0.76, "eval_exact_match": 33.199, "eval_exact_match_for_answerability_classification": 59.3846, "eval_exact_match_for_cause_effect_classification": 52.5714, "eval_exact_match_for_coreference_resolution": 44.0714, "eval_exact_match_for_data_to_text": 5.3269, "eval_exact_match_for_dialogue_act_recognition": 52.4286, "eval_exact_match_for_grammar_error_correction": 6.5, "eval_exact_match_for_keyword_tagging": 44.2, "eval_exact_match_for_overlap_extraction": 14.0, "eval_exact_match_for_question_rewriting": 2.5455, "eval_exact_match_for_task020_mctaco_answerability_classification": 57.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 60.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 9.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 14.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 28.0, "eval_exact_match_for_task050_multirc_answerability_classification": 69.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 30.0, "eval_exact_match_for_task1153_bard_word_analogy": 37.0, "eval_exact_match_for_task1154_bard_word_analogy": 27.0, "eval_exact_match_for_task1155_bard_word_analogy": 89.0, "eval_exact_match_for_task1156_bard_word_analogy": 60.0, "eval_exact_match_for_task1157_bard_word_analogy": 61.0, "eval_exact_match_for_task1158_bard_word_analogy": 48.0, "eval_exact_match_for_task1159_bard_word_analogy": 39.0, "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 8.0, "eval_exact_match_for_task121_atomic_question_rewriting": 1.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 55.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 53.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 1.0, "eval_exact_match_for_task1386_anli_textual_entailment": 2.0, "eval_exact_match_for_task1387_anli_textual_entailment": 3.0, "eval_exact_match_for_task1388_cb_textual_entailment": 1.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 53.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 84.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 64.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 1.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 44.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 58.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 16.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 63.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 41.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 57.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 4.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 58.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 48.0, "eval_exact_match_for_task1615_sick_textual_entailment": 52.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 66.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 38.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 68.0, "eval_exact_match_for_task1659_billsum_title_generation": 2.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 38.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, "eval_exact_match_for_task190_snli_textual_entailment": 9.0, "eval_exact_match_for_task199_multinli_textual_entailment": 45.0, "eval_exact_match_for_task200_multinli_textual_entailment": 84.0, "eval_exact_match_for_task201_multinli_textual_entailment": 18.0, "eval_exact_match_for_task202_multinli_textual_entailment": 8.0, "eval_exact_match_for_task219_rocstories_title_generation": 2.0, "eval_exact_match_for_task220_rocstories_title_generation": 97.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, "eval_exact_match_for_task232_iirc_answerability_classification": 40.0, "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 87.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 57.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 6.0, "eval_exact_match_for_task329_gap_coreference_resolution": 55.0, "eval_exact_match_for_task330_gap_coreference_resolution": 66.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 57.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 70.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 70.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 70.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 20.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 2.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, "eval_exact_match_for_task500_scruples_title_generation": 1.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 68.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, "eval_exact_match_for_task613_liar_keyword_tagging": 18.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 17.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 88.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 37.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 51.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 42.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 84.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 12.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 30.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 73.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 85.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 59.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 58.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 27.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 38.0, "eval_exact_match_for_task891_gap_coreference_resolution": 49.0, "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, "eval_exact_match_for_task893_gap_coreference_resolution": 46.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 60.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 76.0, "eval_exact_match_for_textual_entailment": 37.7083, "eval_exact_match_for_title_generation": 11.2108, "eval_exact_match_for_word_analogy": 48.875, "eval_f1": 50.8535, "eval_f1_for_answerability_classification": 61.9487, "eval_f1_for_cause_effect_classification": 68.797, "eval_f1_for_coreference_resolution": 50.5939, "eval_f1_for_data_to_text": 52.4221, "eval_f1_for_dialogue_act_recognition": 54.5714, "eval_f1_for_grammar_error_correction": 58.5435, "eval_f1_for_keyword_tagging": 60.308, "eval_f1_for_overlap_extraction": 37.631, "eval_f1_for_question_rewriting": 70.7494, "eval_f1_for_task020_mctaco_answerability_classification": 57.0, "eval_f1_for_task033_winogrande_coreference_resolution": 63.1667, "eval_f1_for_task034_winogrande_question_rewriting": 92.1359, "eval_f1_for_task035_winogrande_question_rewriting": 86.9272, "eval_f1_for_task036_qasc_keyword_tagging": 59.2879, "eval_f1_for_task039_qasc_overlap_extraction": 34.2333, "eval_f1_for_task050_multirc_answerability_classification": 69.0, "eval_f1_for_task102_commongen_data_to_text": 54.1283, "eval_f1_for_task1152_bard_word_analogy": 30.0, "eval_f1_for_task1153_bard_word_analogy": 37.0, "eval_f1_for_task1154_bard_word_analogy": 27.0, "eval_f1_for_task1155_bard_word_analogy": 89.0, "eval_f1_for_task1156_bard_word_analogy": 60.6667, "eval_f1_for_task1157_bard_word_analogy": 61.0, "eval_f1_for_task1158_bard_word_analogy": 48.0, "eval_f1_for_task1159_bard_word_analogy": 39.0, "eval_f1_for_task1161_coda_19_title_generation": 38.782, "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.6555, "eval_f1_for_task121_atomic_question_rewriting": 51.2801, "eval_f1_for_task133_winowhy_coreference_resolution": 55.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 12.7019, "eval_f1_for_task1344_rte_textual_entailment": 53.0, "eval_f1_for_task1345_qqp_question_rewriting": 39.7866, "eval_f1_for_task1356_xlsum_title_generation": 24.2203, "eval_f1_for_task1358_xlsum_title_generation": 33.9477, "eval_f1_for_task1385_anli_textual_entailment": 1.0, "eval_f1_for_task1386_anli_textual_entailment": 2.0, "eval_f1_for_task1387_anli_textual_entailment": 3.0, "eval_f1_for_task1388_cb_textual_entailment": 1.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 53.0, "eval_f1_for_task1393_copa_cause_effect_classification": 84.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 64.0, "eval_f1_for_task1407_dart_data_to_text": 31.5423, "eval_f1_for_task1409_dart_data_to_text": 54.0343, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 33.2202, "eval_f1_for_task1439_doqa_answerability_classification": 44.0, "eval_f1_for_task1442_doqa_answerability_classification": 58.0, "eval_f1_for_task1516_imppres_textual_entailment": 16.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 63.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 41.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 39.3424, "eval_f1_for_task1554_scitail_textual_entailment": 58.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.8669, "eval_f1_for_task1562_zest_question_rewriting": 49.3597, "eval_f1_for_task1586_scifact_title_generation": 39.7064, "eval_f1_for_task1598_nyc_data_to_text": 53.2245, "eval_f1_for_task1612_sick_textual_entailment": 48.0, "eval_f1_for_task1615_sick_textual_entailment": 52.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.9162, "eval_f1_for_task1624_disfl_qa_answerability_classification": 66.0, "eval_f1_for_task1631_open_pi_data_to_text": 93.821, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 68.0, "eval_f1_for_task1659_billsum_title_generation": 38.0904, "eval_f1_for_task1664_wino_bias_coreference_resolution": 71.1619, "eval_f1_for_task1728_web_nlg_data_to_text": 60.7004, "eval_f1_for_task190_snli_textual_entailment": 9.0, "eval_f1_for_task199_multinli_textual_entailment": 45.0, "eval_f1_for_task200_multinli_textual_entailment": 84.0, "eval_f1_for_task201_multinli_textual_entailment": 18.0, "eval_f1_for_task202_multinli_textual_entailment": 8.0, "eval_f1_for_task219_rocstories_title_generation": 17.9976, "eval_f1_for_task220_rocstories_title_generation": 97.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, "eval_f1_for_task232_iirc_answerability_classification": 40.0, "eval_f1_for_task233_iirc_answerability_classification": 49.0, "eval_f1_for_task242_tweetqa_answerability_classification": 87.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 69.7333, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 41.0287, "eval_f1_for_task288_gigaword_title_generation": 28.8824, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 27.4667, "eval_f1_for_task329_gap_coreference_resolution": 55.0, "eval_f1_for_task330_gap_coreference_resolution": 72.2524, "eval_f1_for_task349_squad2.0_answerability_classification": 57.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 85.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 90.0, "eval_f1_for_task392_cod3s_cause_effect_classification": 90.0, "eval_f1_for_task393_cod3s_cause_effect_classification": 29.2498, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 24.6667, "eval_f1_for_task402_grailqa_question_rewriting": 77.3535, "eval_f1_for_task418_persent_title_generation": 29.0853, "eval_f1_for_task442_com_qa_question_rewriting": 71.2798, "eval_f1_for_task500_scruples_title_generation": 19.2135, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.9698, "eval_f1_for_task520_aquamuse_answerability_classification": 68.0, "eval_f1_for_task569_recipe_nlg_title_generation": 39.3926, "eval_f1_for_task602_wikitext_title_generation": 14.4416, "eval_f1_for_task613_liar_keyword_tagging": 22.6667, "eval_f1_for_task614_glucose_cause_effect_classification": 44.3295, "eval_f1_for_task619_ohsumed_title_generation": 47.0557, "eval_f1_for_task620_ohsumed_keyword_tagging": 39.3, "eval_f1_for_task623_ohsumed_keyword_tagging": 88.0, "eval_f1_for_task640_e_snli_textual_entailment": 37.0, "eval_f1_for_task641_e_snli_textual_entailment": 51.0, "eval_f1_for_task642_e_snli_textual_entailment": 42.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.2857, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 13.1667, "eval_f1_for_task670_ambigqa_question_rewriting": 80.5162, "eval_f1_for_task671_ambigqa_question_rewriting": 69.0326, "eval_f1_for_task677_ollie_data_to_text": 32.9654, "eval_f1_for_task738_perspectrum_textual_entailment": 30.0, "eval_f1_for_task743_eurlex_title_generation": 37.339, "eval_f1_for_task760_msr_sqa_data_to_text": 3.5685, "eval_f1_for_task769_qed_title_generation": 80.7579, "eval_f1_for_task827_copa_cause_effect_classification": 85.0, "eval_f1_for_task828_copa_cause_effect_classification": 59.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 58.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 27.0, "eval_f1_for_task890_gwsd_textual_entailment": 38.0, "eval_f1_for_task891_gap_coreference_resolution": 57.7, "eval_f1_for_task892_gap_coreference_resolution": 50.0, "eval_f1_for_task893_gap_coreference_resolution": 46.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 60.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task957_e2e_data_to_text": 51.6623, "eval_f1_for_task970_sherliic_textual_entailment": 76.0, "eval_f1_for_textual_entailment": 37.7083, "eval_f1_for_title_generation": 37.9829, "eval_f1_for_word_analogy": 48.9583, "eval_gen_len": 8.9422, "eval_global_step": 3500, "eval_loss": 1.1319254636764526, "eval_rouge1": 53.9336, "eval_rouge1_for_answerability_classification": 61.9487, "eval_rouge1_for_cause_effect_classification": 69.369, "eval_rouge1_for_coreference_resolution": 50.9677, "eval_rouge1_for_data_to_text": 55.3876, "eval_rouge1_for_dialogue_act_recognition": 58.5921, "eval_rouge1_for_grammar_error_correction": 62.8705, "eval_rouge1_for_keyword_tagging": 64.4549, "eval_rouge1_for_overlap_extraction": 41.051, "eval_rouge1_for_question_rewriting": 72.2047, "eval_rouge1_for_task020_mctaco_answerability_classification": 57.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 63.0667, "eval_rouge1_for_task034_winogrande_question_rewriting": 92.1805, "eval_rouge1_for_task035_winogrande_question_rewriting": 87.7258, "eval_rouge1_for_task036_qasc_keyword_tagging": 62.522, "eval_rouge1_for_task039_qasc_overlap_extraction": 40.2333, "eval_rouge1_for_task050_multirc_answerability_classification": 69.0, "eval_rouge1_for_task102_commongen_data_to_text": 67.714, "eval_rouge1_for_task1152_bard_word_analogy": 30.0, "eval_rouge1_for_task1153_bard_word_analogy": 38.0, "eval_rouge1_for_task1154_bard_word_analogy": 27.0, "eval_rouge1_for_task1155_bard_word_analogy": 89.0, "eval_rouge1_for_task1156_bard_word_analogy": 60.6667, "eval_rouge1_for_task1157_bard_word_analogy": 61.0, "eval_rouge1_for_task1158_bard_word_analogy": 48.0, "eval_rouge1_for_task1159_bard_word_analogy": 39.0, "eval_rouge1_for_task1161_coda_19_title_generation": 42.7369, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 81.8601, "eval_rouge1_for_task121_atomic_question_rewriting": 53.4059, "eval_rouge1_for_task133_winowhy_coreference_resolution": 55.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.4542, "eval_rouge1_for_task1344_rte_textual_entailment": 53.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 42.957, "eval_rouge1_for_task1356_xlsum_title_generation": 28.0982, "eval_rouge1_for_task1358_xlsum_title_generation": 37.9751, "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 53.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 84.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 73.4778, "eval_rouge1_for_task1407_dart_data_to_text": 31.9165, "eval_rouge1_for_task1409_dart_data_to_text": 55.0963, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.8362, "eval_rouge1_for_task1439_doqa_answerability_classification": 44.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 58.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 16.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 63.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 42.9942, "eval_rouge1_for_task1554_scitail_textual_entailment": 58.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.9048, "eval_rouge1_for_task1562_zest_question_rewriting": 52.0946, "eval_rouge1_for_task1586_scifact_title_generation": 43.9918, "eval_rouge1_for_task1598_nyc_data_to_text": 54.7826, "eval_rouge1_for_task1612_sick_textual_entailment": 48.0, "eval_rouge1_for_task1615_sick_textual_entailment": 84.0, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.1182, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 66.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 93.9548, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 68.0, "eval_rouge1_for_task1659_billsum_title_generation": 39.9793, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 71.1619, "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.7653, "eval_rouge1_for_task190_snli_textual_entailment": 9.0, "eval_rouge1_for_task199_multinli_textual_entailment": 45.0, "eval_rouge1_for_task200_multinli_textual_entailment": 84.0, "eval_rouge1_for_task201_multinli_textual_entailment": 18.0, "eval_rouge1_for_task202_multinli_textual_entailment": 8.0, "eval_rouge1_for_task219_rocstories_title_generation": 22.3715, "eval_rouge1_for_task220_rocstories_title_generation": 97.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, "eval_rouge1_for_task232_iirc_answerability_classification": 40.0, "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 87.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 69.9, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 41.8686, "eval_rouge1_for_task288_gigaword_title_generation": 31.9976, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 27.8667, "eval_rouge1_for_task329_gap_coreference_resolution": 55.0, "eval_rouge1_for_task330_gap_coreference_resolution": 72.3524, "eval_rouge1_for_task349_squad2.0_answerability_classification": 57.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 85.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 90.0, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 90.0, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 29.3474, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 29.3333, "eval_rouge1_for_task402_grailqa_question_rewriting": 79.2528, "eval_rouge1_for_task418_persent_title_generation": 33.4641, "eval_rouge1_for_task442_com_qa_question_rewriting": 74.6792, "eval_rouge1_for_task500_scruples_title_generation": 21.3252, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.3767, "eval_rouge1_for_task520_aquamuse_answerability_classification": 68.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 40.8678, "eval_rouge1_for_task602_wikitext_title_generation": 15.5308, "eval_rouge1_for_task613_liar_keyword_tagging": 35.1667, "eval_rouge1_for_task614_glucose_cause_effect_classification": 48.2358, "eval_rouge1_for_task619_ohsumed_title_generation": 50.0317, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 43.8, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 88.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 37.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 51.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 42.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 92.7857, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 13.1667, "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.3018, "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.676, "eval_rouge1_for_task677_ollie_data_to_text": 35.5022, "eval_rouge1_for_task738_perspectrum_textual_entailment": 71.0, "eval_rouge1_for_task743_eurlex_title_generation": 39.2896, "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.8634, "eval_rouge1_for_task769_qed_title_generation": 80.3432, "eval_rouge1_for_task827_copa_cause_effect_classification": 85.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 59.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 58.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 39.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 38.0, "eval_rouge1_for_task891_gap_coreference_resolution": 57.7, "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, "eval_rouge1_for_task893_gap_coreference_resolution": 46.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 60.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task957_e2e_data_to_text": 54.7657, "eval_rouge1_for_task970_sherliic_textual_entailment": 76.0, "eval_rouge1_for_textual_entailment": 46.375, "eval_rouge1_for_title_generation": 40.434, "eval_rouge1_for_word_analogy": 49.0833, "eval_rougeL": 52.4266, "eval_rougeL_for_answerability_classification": 61.9487, "eval_rougeL_for_cause_effect_classification": 68.6387, "eval_rougeL_for_coreference_resolution": 50.9677, "eval_rougeL_for_data_to_text": 46.6477, "eval_rougeL_for_dialogue_act_recognition": 58.5921, "eval_rougeL_for_grammar_error_correction": 62.0576, "eval_rougeL_for_keyword_tagging": 63.8613, "eval_rougeL_for_overlap_extraction": 40.3261, "eval_rougeL_for_question_rewriting": 68.7983, "eval_rougeL_for_task020_mctaco_answerability_classification": 57.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 63.0667, "eval_rougeL_for_task034_winogrande_question_rewriting": 92.1805, "eval_rougeL_for_task035_winogrande_question_rewriting": 86.8728, "eval_rougeL_for_task036_qasc_keyword_tagging": 61.0375, "eval_rougeL_for_task039_qasc_overlap_extraction": 40.2333, "eval_rougeL_for_task050_multirc_answerability_classification": 69.0, "eval_rougeL_for_task102_commongen_data_to_text": 55.0017, "eval_rougeL_for_task1152_bard_word_analogy": 30.0, "eval_rougeL_for_task1153_bard_word_analogy": 38.0, "eval_rougeL_for_task1154_bard_word_analogy": 27.0, "eval_rougeL_for_task1155_bard_word_analogy": 89.0, "eval_rougeL_for_task1156_bard_word_analogy": 60.6667, "eval_rougeL_for_task1157_bard_word_analogy": 61.0, "eval_rougeL_for_task1158_bard_word_analogy": 48.0, "eval_rougeL_for_task1159_bard_word_analogy": 39.0, "eval_rougeL_for_task1161_coda_19_title_generation": 35.8263, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 80.9008, "eval_rougeL_for_task121_atomic_question_rewriting": 48.9177, "eval_rougeL_for_task133_winowhy_coreference_resolution": 55.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.7192, "eval_rougeL_for_task1344_rte_textual_entailment": 53.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 39.9751, "eval_rougeL_for_task1356_xlsum_title_generation": 23.2959, "eval_rougeL_for_task1358_xlsum_title_generation": 31.7653, "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 53.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 84.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 73.4778, "eval_rougeL_for_task1407_dart_data_to_text": 27.6193, "eval_rougeL_for_task1409_dart_data_to_text": 44.0496, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.0753, "eval_rougeL_for_task1439_doqa_answerability_classification": 44.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 58.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 16.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 63.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 57.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 40.2642, "eval_rougeL_for_task1554_scitail_textual_entailment": 58.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.0399, "eval_rougeL_for_task1562_zest_question_rewriting": 46.0866, "eval_rougeL_for_task1586_scifact_title_generation": 36.2407, "eval_rougeL_for_task1598_nyc_data_to_text": 42.4799, "eval_rougeL_for_task1612_sick_textual_entailment": 48.0, "eval_rougeL_for_task1615_sick_textual_entailment": 84.0, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.874, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 66.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 88.9029, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 68.0, "eval_rougeL_for_task1659_billsum_title_generation": 34.409, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 71.1619, "eval_rougeL_for_task1728_web_nlg_data_to_text": 54.3044, "eval_rougeL_for_task190_snli_textual_entailment": 9.0, "eval_rougeL_for_task199_multinli_textual_entailment": 45.0, "eval_rougeL_for_task200_multinli_textual_entailment": 84.0, "eval_rougeL_for_task201_multinli_textual_entailment": 18.0, "eval_rougeL_for_task202_multinli_textual_entailment": 8.0, "eval_rougeL_for_task219_rocstories_title_generation": 22.0382, "eval_rougeL_for_task220_rocstories_title_generation": 97.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, "eval_rougeL_for_task232_iirc_answerability_classification": 40.0, "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 87.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 69.9, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 40.4189, "eval_rougeL_for_task288_gigaword_title_generation": 27.7844, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 27.8667, "eval_rougeL_for_task329_gap_coreference_resolution": 55.0, "eval_rougeL_for_task330_gap_coreference_resolution": 72.3524, "eval_rougeL_for_task349_squad2.0_answerability_classification": 57.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 85.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 90.0, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 90.0, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.5797, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 29.3333, "eval_rougeL_for_task402_grailqa_question_rewriting": 64.1936, "eval_rougeL_for_task418_persent_title_generation": 29.5632, "eval_rougeL_for_task442_com_qa_question_rewriting": 70.5345, "eval_rougeL_for_task500_scruples_title_generation": 19.7278, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 41.9656, "eval_rougeL_for_task520_aquamuse_answerability_classification": 68.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 38.8289, "eval_rougeL_for_task602_wikitext_title_generation": 15.5308, "eval_rougeL_for_task613_liar_keyword_tagging": 35.1667, "eval_rougeL_for_task614_glucose_cause_effect_classification": 43.8913, "eval_rougeL_for_task619_ohsumed_title_generation": 43.7677, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 42.3167, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 88.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 37.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 51.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 42.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 92.7857, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 13.1667, "eval_rougeL_for_task670_ambigqa_question_rewriting": 80.4752, "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.7701, "eval_rougeL_for_task677_ollie_data_to_text": 30.0575, "eval_rougeL_for_task738_perspectrum_textual_entailment": 71.0, "eval_rougeL_for_task743_eurlex_title_generation": 34.0891, "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.5467, "eval_rougeL_for_task769_qed_title_generation": 80.3432, "eval_rougeL_for_task827_copa_cause_effect_classification": 85.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 59.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 58.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 39.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 38.0, "eval_rougeL_for_task891_gap_coreference_resolution": 57.7, "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, "eval_rougeL_for_task893_gap_coreference_resolution": 46.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 60.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task957_e2e_data_to_text": 41.9724, "eval_rougeL_for_task970_sherliic_textual_entailment": 76.0, "eval_rougeL_for_textual_entailment": 46.375, "eval_rougeL_for_title_generation": 37.1454, "eval_rougeL_for_word_analogy": 49.0833, "eval_runtime": 842.2829, "eval_samples_per_second": 14.14, "eval_steps_per_second": 0.885, "step": 3500 }, { "epoch": 0.87, "learning_rate": 5e-05, "loss": 0.9275, "step": 4000 }, { "epoch": 0.87, "eval_exact_match": 33.0982, "eval_exact_match_for_answerability_classification": 52.6154, "eval_exact_match_for_cause_effect_classification": 44.7143, "eval_exact_match_for_coreference_resolution": 43.2857, "eval_exact_match_for_data_to_text": 6.6586, "eval_exact_match_for_dialogue_act_recognition": 53.4286, "eval_exact_match_for_grammar_error_correction": 7.0, "eval_exact_match_for_keyword_tagging": 48.8, "eval_exact_match_for_overlap_extraction": 11.5, "eval_exact_match_for_question_rewriting": 2.0909, "eval_exact_match_for_task020_mctaco_answerability_classification": 59.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 62.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 8.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 37.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, "eval_exact_match_for_task050_multirc_answerability_classification": 73.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 44.0, "eval_exact_match_for_task1153_bard_word_analogy": 40.0, "eval_exact_match_for_task1154_bard_word_analogy": 29.0, "eval_exact_match_for_task1155_bard_word_analogy": 79.0, "eval_exact_match_for_task1156_bard_word_analogy": 66.0, "eval_exact_match_for_task1157_bard_word_analogy": 62.0, "eval_exact_match_for_task1158_bard_word_analogy": 50.0, "eval_exact_match_for_task1159_bard_word_analogy": 42.0, "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 61.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 75.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 2.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 4.0, "eval_exact_match_for_task1386_anli_textual_entailment": 9.0, "eval_exact_match_for_task1387_anli_textual_entailment": 11.0, "eval_exact_match_for_task1388_cb_textual_entailment": 3.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 56.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 70.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 66.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 1.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 47.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 14.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 63.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 45.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 57.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 51.0, "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 76.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 39.0, "eval_exact_match_for_task1615_sick_textual_entailment": 58.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 63.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 47.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 67.0, "eval_exact_match_for_task1659_billsum_title_generation": 3.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 31.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, "eval_exact_match_for_task190_snli_textual_entailment": 36.0, "eval_exact_match_for_task199_multinli_textual_entailment": 41.0, "eval_exact_match_for_task200_multinli_textual_entailment": 80.0, "eval_exact_match_for_task201_multinli_textual_entailment": 19.0, "eval_exact_match_for_task202_multinli_textual_entailment": 15.0, "eval_exact_match_for_task219_rocstories_title_generation": 0.0, "eval_exact_match_for_task220_rocstories_title_generation": 95.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 90.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 47.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, "eval_exact_match_for_task329_gap_coreference_resolution": 52.0, "eval_exact_match_for_task330_gap_coreference_resolution": 60.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 55.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 49.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 53.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 59.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 26.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, "eval_exact_match_for_task418_persent_title_generation": 2.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 71.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, "eval_exact_match_for_task602_wikitext_title_generation": 2.381, "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 17.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 89.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 33.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 47.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 82.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 18.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 32.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 74.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 67.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 64.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 76.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 30.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 55.0, "eval_exact_match_for_task891_gap_coreference_resolution": 49.0, "eval_exact_match_for_task892_gap_coreference_resolution": 48.0, "eval_exact_match_for_task893_gap_coreference_resolution": 42.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 65.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 66.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 60.0, "eval_exact_match_for_textual_entailment": 41.75, "eval_exact_match_for_title_generation": 10.7623, "eval_exact_match_for_word_analogy": 51.5, "eval_f1": 50.6353, "eval_f1_for_answerability_classification": 55.1795, "eval_f1_for_cause_effect_classification": 63.9381, "eval_f1_for_coreference_resolution": 49.948, "eval_f1_for_data_to_text": 54.3517, "eval_f1_for_dialogue_act_recognition": 57.0, "eval_f1_for_grammar_error_correction": 59.2994, "eval_f1_for_keyword_tagging": 60.8444, "eval_f1_for_overlap_extraction": 26.9657, "eval_f1_for_question_rewriting": 70.0259, "eval_f1_for_task020_mctaco_answerability_classification": 59.0, "eval_f1_for_task033_winogrande_coreference_resolution": 64.6667, "eval_f1_for_task034_winogrande_question_rewriting": 90.9762, "eval_f1_for_task035_winogrande_question_rewriting": 88.0962, "eval_f1_for_task036_qasc_keyword_tagging": 67.3984, "eval_f1_for_task039_qasc_overlap_extraction": 33.9857, "eval_f1_for_task050_multirc_answerability_classification": 73.0, "eval_f1_for_task102_commongen_data_to_text": 53.0544, "eval_f1_for_task1152_bard_word_analogy": 44.0, "eval_f1_for_task1153_bard_word_analogy": 40.0, "eval_f1_for_task1154_bard_word_analogy": 29.0, "eval_f1_for_task1155_bard_word_analogy": 79.0, "eval_f1_for_task1156_bard_word_analogy": 66.6667, "eval_f1_for_task1157_bard_word_analogy": 62.0, "eval_f1_for_task1158_bard_word_analogy": 50.0, "eval_f1_for_task1159_bard_word_analogy": 42.0, "eval_f1_for_task1161_coda_19_title_generation": 36.95, "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.2364, "eval_f1_for_task121_atomic_question_rewriting": 48.7845, "eval_f1_for_task133_winowhy_coreference_resolution": 61.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 12.4813, "eval_f1_for_task1344_rte_textual_entailment": 75.0, "eval_f1_for_task1345_qqp_question_rewriting": 42.1618, "eval_f1_for_task1356_xlsum_title_generation": 22.7138, "eval_f1_for_task1358_xlsum_title_generation": 33.7612, "eval_f1_for_task1385_anli_textual_entailment": 4.0, "eval_f1_for_task1386_anli_textual_entailment": 9.0, "eval_f1_for_task1387_anli_textual_entailment": 11.0, "eval_f1_for_task1388_cb_textual_entailment": 3.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 56.0, "eval_f1_for_task1393_copa_cause_effect_classification": 70.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 66.0, "eval_f1_for_task1407_dart_data_to_text": 35.1167, "eval_f1_for_task1409_dart_data_to_text": 55.9499, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.8326, "eval_f1_for_task1439_doqa_answerability_classification": 47.0, "eval_f1_for_task1442_doqa_answerability_classification": 57.0, "eval_f1_for_task1516_imppres_textual_entailment": 14.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 63.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 51.0, "eval_f1_for_task1540_peer_read_title_generation": 34.6049, "eval_f1_for_task1554_scitail_textual_entailment": 76.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.7663, "eval_f1_for_task1562_zest_question_rewriting": 50.8055, "eval_f1_for_task1586_scifact_title_generation": 38.1744, "eval_f1_for_task1598_nyc_data_to_text": 53.0503, "eval_f1_for_task1612_sick_textual_entailment": 39.0, "eval_f1_for_task1615_sick_textual_entailment": 58.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.1522, "eval_f1_for_task1624_disfl_qa_answerability_classification": 63.0, "eval_f1_for_task1631_open_pi_data_to_text": 95.6846, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 67.0, "eval_f1_for_task1659_billsum_title_generation": 35.5767, "eval_f1_for_task1664_wino_bias_coreference_resolution": 70.3286, "eval_f1_for_task1728_web_nlg_data_to_text": 65.0968, "eval_f1_for_task190_snli_textual_entailment": 36.0, "eval_f1_for_task199_multinli_textual_entailment": 41.0, "eval_f1_for_task200_multinli_textual_entailment": 80.0, "eval_f1_for_task201_multinli_textual_entailment": 19.0, "eval_f1_for_task202_multinli_textual_entailment": 15.0, "eval_f1_for_task219_rocstories_title_generation": 15.9782, "eval_f1_for_task220_rocstories_title_generation": 95.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, "eval_f1_for_task232_iirc_answerability_classification": 0.0, "eval_f1_for_task233_iirc_answerability_classification": 0.0, "eval_f1_for_task242_tweetqa_answerability_classification": 90.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 58.7048, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 19.9457, "eval_f1_for_task288_gigaword_title_generation": 29.2614, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 7.5333, "eval_f1_for_task329_gap_coreference_resolution": 52.0, "eval_f1_for_task330_gap_coreference_resolution": 67.5524, "eval_f1_for_task349_squad2.0_answerability_classification": 55.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 84.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 86.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 32.075, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 29.8333, "eval_f1_for_task402_grailqa_question_rewriting": 80.1505, "eval_f1_for_task418_persent_title_generation": 27.2894, "eval_f1_for_task442_com_qa_question_rewriting": 69.6365, "eval_f1_for_task500_scruples_title_generation": 17.4133, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 42.827, "eval_f1_for_task520_aquamuse_answerability_classification": 71.0, "eval_f1_for_task569_recipe_nlg_title_generation": 37.7793, "eval_f1_for_task602_wikitext_title_generation": 12.3121, "eval_f1_for_task613_liar_keyword_tagging": 21.6667, "eval_f1_for_task614_glucose_cause_effect_classification": 43.8252, "eval_f1_for_task619_ohsumed_title_generation": 45.0399, "eval_f1_for_task620_ohsumed_keyword_tagging": 36.2333, "eval_f1_for_task623_ohsumed_keyword_tagging": 89.0, "eval_f1_for_task640_e_snli_textual_entailment": 33.0, "eval_f1_for_task641_e_snli_textual_entailment": 47.0, "eval_f1_for_task642_e_snli_textual_entailment": 50.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 89.9238, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 33.8333, "eval_f1_for_task670_ambigqa_question_rewriting": 81.3731, "eval_f1_for_task671_ambigqa_question_rewriting": 59.9119, "eval_f1_for_task677_ollie_data_to_text": 33.6557, "eval_f1_for_task738_perspectrum_textual_entailment": 32.0, "eval_f1_for_task743_eurlex_title_generation": 40.4007, "eval_f1_for_task760_msr_sqa_data_to_text": 7.1776, "eval_f1_for_task769_qed_title_generation": 81.2537, "eval_f1_for_task827_copa_cause_effect_classification": 67.0, "eval_f1_for_task828_copa_cause_effect_classification": 64.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 76.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 30.0, "eval_f1_for_task890_gwsd_textual_entailment": 55.0, "eval_f1_for_task891_gap_coreference_resolution": 57.819, "eval_f1_for_task892_gap_coreference_resolution": 48.0, "eval_f1_for_task893_gap_coreference_resolution": 42.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 65.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 66.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_f1_for_task957_e2e_data_to_text": 55.4707, "eval_f1_for_task970_sherliic_textual_entailment": 60.0, "eval_f1_for_textual_entailment": 41.75, "eval_f1_for_title_generation": 36.8188, "eval_f1_for_word_analogy": 51.5833, "eval_gen_len": 9.5761, "eval_global_step": 4000, "eval_loss": 1.147523283958435, "eval_rouge1": 53.6134, "eval_rouge1_for_answerability_classification": 55.1795, "eval_rouge1_for_cause_effect_classification": 64.9617, "eval_rouge1_for_coreference_resolution": 50.5321, "eval_rouge1_for_data_to_text": 57.034, "eval_rouge1_for_dialogue_act_recognition": 59.8741, "eval_rouge1_for_grammar_error_correction": 63.9674, "eval_rouge1_for_keyword_tagging": 66.7156, "eval_rouge1_for_overlap_extraction": 29.9797, "eval_rouge1_for_question_rewriting": 71.4499, "eval_rouge1_for_task020_mctaco_answerability_classification": 59.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 64.6667, "eval_rouge1_for_task034_winogrande_question_rewriting": 91.0327, "eval_rouge1_for_task035_winogrande_question_rewriting": 88.6066, "eval_rouge1_for_task036_qasc_keyword_tagging": 74.9349, "eval_rouge1_for_task039_qasc_overlap_extraction": 39.6524, "eval_rouge1_for_task050_multirc_answerability_classification": 73.0, "eval_rouge1_for_task102_commongen_data_to_text": 66.4211, "eval_rouge1_for_task1152_bard_word_analogy": 44.0, "eval_rouge1_for_task1153_bard_word_analogy": 40.0, "eval_rouge1_for_task1154_bard_word_analogy": 29.0, "eval_rouge1_for_task1155_bard_word_analogy": 79.0, "eval_rouge1_for_task1156_bard_word_analogy": 66.6667, "eval_rouge1_for_task1157_bard_word_analogy": 62.0, "eval_rouge1_for_task1158_bard_word_analogy": 50.0, "eval_rouge1_for_task1159_bard_word_analogy": 42.0, "eval_rouge1_for_task1161_coda_19_title_generation": 40.7341, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.486, "eval_rouge1_for_task121_atomic_question_rewriting": 51.0412, "eval_rouge1_for_task133_winowhy_coreference_resolution": 61.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 12.9974, "eval_rouge1_for_task1344_rte_textual_entailment": 75.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 45.4146, "eval_rouge1_for_task1356_xlsum_title_generation": 27.5889, "eval_rouge1_for_task1358_xlsum_title_generation": 38.0125, "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, "eval_rouge1_for_task1387_anli_textual_entailment": 36.0, "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 56.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 70.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 67.7857, "eval_rouge1_for_task1407_dart_data_to_text": 35.5253, "eval_rouge1_for_task1409_dart_data_to_text": 56.4526, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.9841, "eval_rouge1_for_task1439_doqa_answerability_classification": 47.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 57.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 14.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 63.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 48.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 51.0, "eval_rouge1_for_task1540_peer_read_title_generation": 38.094, "eval_rouge1_for_task1554_scitail_textual_entailment": 76.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.9506, "eval_rouge1_for_task1562_zest_question_rewriting": 53.0016, "eval_rouge1_for_task1586_scifact_title_generation": 42.162, "eval_rouge1_for_task1598_nyc_data_to_text": 53.5013, "eval_rouge1_for_task1612_sick_textual_entailment": 39.0, "eval_rouge1_for_task1615_sick_textual_entailment": 86.0, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.3701, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 63.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 95.7233, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 67.0, "eval_rouge1_for_task1659_billsum_title_generation": 37.7722, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 70.3286, "eval_rouge1_for_task1728_web_nlg_data_to_text": 67.2193, "eval_rouge1_for_task190_snli_textual_entailment": 36.0, "eval_rouge1_for_task199_multinli_textual_entailment": 41.0, "eval_rouge1_for_task200_multinli_textual_entailment": 80.0, "eval_rouge1_for_task201_multinli_textual_entailment": 19.0, "eval_rouge1_for_task202_multinli_textual_entailment": 15.0, "eval_rouge1_for_task219_rocstories_title_generation": 21.0304, "eval_rouge1_for_task220_rocstories_title_generation": 95.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, "eval_rouge1_for_task232_iirc_answerability_classification": 0.0, "eval_rouge1_for_task233_iirc_answerability_classification": 0.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 90.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 59.55, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 20.307, "eval_rouge1_for_task288_gigaword_title_generation": 32.6332, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 8.0333, "eval_rouge1_for_task329_gap_coreference_resolution": 52.0, "eval_rouge1_for_task330_gap_coreference_resolution": 67.819, "eval_rouge1_for_task349_squad2.0_answerability_classification": 55.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 84.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 86.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.5471, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 36.1667, "eval_rouge1_for_task402_grailqa_question_rewriting": 82.2364, "eval_rouge1_for_task418_persent_title_generation": 31.5464, "eval_rouge1_for_task442_com_qa_question_rewriting": 73.0475, "eval_rouge1_for_task500_scruples_title_generation": 19.0953, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.3769, "eval_rouge1_for_task520_aquamuse_answerability_classification": 71.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 39.3631, "eval_rouge1_for_task602_wikitext_title_generation": 13.3095, "eval_rouge1_for_task613_liar_keyword_tagging": 35.3333, "eval_rouge1_for_task614_glucose_cause_effect_classification": 50.518, "eval_rouge1_for_task619_ohsumed_title_generation": 48.3967, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 43.8857, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 89.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 33.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 47.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 90.4238, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 33.8333, "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.0678, "eval_rouge1_for_task671_ambigqa_question_rewriting": 60.6448, "eval_rouge1_for_task677_ollie_data_to_text": 36.3454, "eval_rouge1_for_task738_perspectrum_textual_entailment": 79.0, "eval_rouge1_for_task743_eurlex_title_generation": 42.34, "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.8021, "eval_rouge1_for_task769_qed_title_generation": 81.2073, "eval_rouge1_for_task827_copa_cause_effect_classification": 67.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 64.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 76.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 45.3333, "eval_rouge1_for_task890_gwsd_textual_entailment": 55.0, "eval_rouge1_for_task891_gap_coreference_resolution": 58.0524, "eval_rouge1_for_task892_gap_coreference_resolution": 48.0, "eval_rouge1_for_task893_gap_coreference_resolution": 42.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 65.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 66.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_rouge1_for_task957_e2e_data_to_text": 57.8841, "eval_rouge1_for_task970_sherliic_textual_entailment": 60.0, "eval_rouge1_for_textual_entailment": 49.7083, "eval_rouge1_for_title_generation": 39.3795, "eval_rouge1_for_word_analogy": 51.5833, "eval_rougeL": 52.0816, "eval_rougeL_for_answerability_classification": 55.1795, "eval_rougeL_for_cause_effect_classification": 64.0946, "eval_rougeL_for_coreference_resolution": 50.5321, "eval_rougeL_for_data_to_text": 48.6853, "eval_rougeL_for_dialogue_act_recognition": 59.8741, "eval_rougeL_for_grammar_error_correction": 63.1709, "eval_rougeL_for_keyword_tagging": 66.4302, "eval_rougeL_for_overlap_extraction": 29.6339, "eval_rougeL_for_question_rewriting": 67.5771, "eval_rougeL_for_task020_mctaco_answerability_classification": 59.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 64.6667, "eval_rougeL_for_task034_winogrande_question_rewriting": 91.0327, "eval_rougeL_for_task035_winogrande_question_rewriting": 87.988, "eval_rougeL_for_task036_qasc_keyword_tagging": 74.7413, "eval_rougeL_for_task039_qasc_overlap_extraction": 39.6524, "eval_rougeL_for_task050_multirc_answerability_classification": 73.0, "eval_rougeL_for_task102_commongen_data_to_text": 54.9577, "eval_rougeL_for_task1152_bard_word_analogy": 44.0, "eval_rougeL_for_task1153_bard_word_analogy": 40.0, "eval_rougeL_for_task1154_bard_word_analogy": 29.0, "eval_rougeL_for_task1155_bard_word_analogy": 79.0, "eval_rougeL_for_task1156_bard_word_analogy": 66.6667, "eval_rougeL_for_task1157_bard_word_analogy": 62.0, "eval_rougeL_for_task1158_bard_word_analogy": 50.0, "eval_rougeL_for_task1159_bard_word_analogy": 42.0, "eval_rougeL_for_task1161_coda_19_title_generation": 33.6071, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.8697, "eval_rougeL_for_task121_atomic_question_rewriting": 44.6281, "eval_rougeL_for_task133_winowhy_coreference_resolution": 61.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.3567, "eval_rougeL_for_task1344_rte_textual_entailment": 75.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 42.547, "eval_rougeL_for_task1356_xlsum_title_generation": 22.95, "eval_rougeL_for_task1358_xlsum_title_generation": 31.4113, "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, "eval_rougeL_for_task1387_anli_textual_entailment": 36.0, "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 56.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 70.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 67.7857, "eval_rougeL_for_task1407_dart_data_to_text": 31.5024, "eval_rougeL_for_task1409_dart_data_to_text": 46.3977, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.3074, "eval_rougeL_for_task1439_doqa_answerability_classification": 47.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 57.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 14.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 63.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 48.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 57.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 51.0, "eval_rougeL_for_task1540_peer_read_title_generation": 34.4935, "eval_rougeL_for_task1554_scitail_textual_entailment": 76.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.0345, "eval_rougeL_for_task1562_zest_question_rewriting": 46.8165, "eval_rougeL_for_task1586_scifact_title_generation": 34.3232, "eval_rougeL_for_task1598_nyc_data_to_text": 41.2663, "eval_rougeL_for_task1612_sick_textual_entailment": 39.0, "eval_rougeL_for_task1615_sick_textual_entailment": 86.0, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.0733, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 63.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 93.9084, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 67.0, "eval_rougeL_for_task1659_billsum_title_generation": 32.1891, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 70.3286, "eval_rougeL_for_task1728_web_nlg_data_to_text": 56.8678, "eval_rougeL_for_task190_snli_textual_entailment": 36.0, "eval_rougeL_for_task199_multinli_textual_entailment": 41.0, "eval_rougeL_for_task200_multinli_textual_entailment": 80.0, "eval_rougeL_for_task201_multinli_textual_entailment": 19.0, "eval_rougeL_for_task202_multinli_textual_entailment": 15.0, "eval_rougeL_for_task219_rocstories_title_generation": 20.7804, "eval_rougeL_for_task220_rocstories_title_generation": 95.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, "eval_rougeL_for_task232_iirc_answerability_classification": 0.0, "eval_rougeL_for_task233_iirc_answerability_classification": 0.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 90.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 59.55, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 19.6155, "eval_rougeL_for_task288_gigaword_title_generation": 28.6214, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 8.0333, "eval_rougeL_for_task329_gap_coreference_resolution": 52.0, "eval_rougeL_for_task330_gap_coreference_resolution": 67.819, "eval_rougeL_for_task349_squad2.0_answerability_classification": 55.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 84.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 86.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.8065, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 36.1667, "eval_rougeL_for_task402_grailqa_question_rewriting": 66.0522, "eval_rougeL_for_task418_persent_title_generation": 28.0107, "eval_rougeL_for_task442_com_qa_question_rewriting": 66.7771, "eval_rougeL_for_task500_scruples_title_generation": 17.8865, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.5932, "eval_rougeL_for_task520_aquamuse_answerability_classification": 71.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 37.5814, "eval_rougeL_for_task602_wikitext_title_generation": 13.3095, "eval_rougeL_for_task613_liar_keyword_tagging": 35.3333, "eval_rougeL_for_task614_glucose_cause_effect_classification": 45.1894, "eval_rougeL_for_task619_ohsumed_title_generation": 40.4526, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 42.6524, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 89.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 33.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 47.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 90.4238, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 33.8333, "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.4787, "eval_rougeL_for_task671_ambigqa_question_rewriting": 59.0846, "eval_rougeL_for_task677_ollie_data_to_text": 31.3218, "eval_rougeL_for_task738_perspectrum_textual_entailment": 79.0, "eval_rougeL_for_task743_eurlex_title_generation": 36.7888, "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.4663, "eval_rougeL_for_task769_qed_title_generation": 81.2073, "eval_rougeL_for_task827_copa_cause_effect_classification": 67.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 64.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 76.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 45.3333, "eval_rougeL_for_task890_gwsd_textual_entailment": 55.0, "eval_rougeL_for_task891_gap_coreference_resolution": 58.0524, "eval_rougeL_for_task892_gap_coreference_resolution": 48.0, "eval_rougeL_for_task893_gap_coreference_resolution": 42.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 65.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 66.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_rougeL_for_task957_e2e_data_to_text": 44.2369, "eval_rougeL_for_task970_sherliic_textual_entailment": 60.0, "eval_rougeL_for_textual_entailment": 49.7083, "eval_rougeL_for_title_generation": 35.9548, "eval_rougeL_for_word_analogy": 51.5833, "eval_runtime": 939.9946, "eval_samples_per_second": 12.67, "eval_steps_per_second": 0.793, "step": 4000 }, { "epoch": 0.98, "learning_rate": 5e-05, "loss": 0.946, "step": 4500 }, { "epoch": 0.98, "eval_exact_match": 32.9891, "eval_exact_match_for_answerability_classification": 52.0769, "eval_exact_match_for_cause_effect_classification": 50.1429, "eval_exact_match_for_coreference_resolution": 43.6429, "eval_exact_match_for_data_to_text": 7.385, "eval_exact_match_for_dialogue_act_recognition": 53.1429, "eval_exact_match_for_grammar_error_correction": 6.5, "eval_exact_match_for_keyword_tagging": 43.2, "eval_exact_match_for_overlap_extraction": 15.0, "eval_exact_match_for_question_rewriting": 3.0909, "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 56.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 3.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 10.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 42.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 30.0, "eval_exact_match_for_task050_multirc_answerability_classification": 56.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 42.0, "eval_exact_match_for_task1153_bard_word_analogy": 32.0, "eval_exact_match_for_task1154_bard_word_analogy": 19.0, "eval_exact_match_for_task1155_bard_word_analogy": 88.0, "eval_exact_match_for_task1156_bard_word_analogy": 51.0, "eval_exact_match_for_task1157_bard_word_analogy": 58.0, "eval_exact_match_for_task1158_bard_word_analogy": 40.0, "eval_exact_match_for_task1159_bard_word_analogy": 36.0, "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 9.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 54.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, "eval_exact_match_for_task1344_rte_textual_entailment": 56.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 3.0, "eval_exact_match_for_task1386_anli_textual_entailment": 5.0, "eval_exact_match_for_task1387_anli_textual_entailment": 8.0, "eval_exact_match_for_task1388_cb_textual_entailment": 1.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 51.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 57.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 81.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 64.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 2.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 57.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 37.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 61.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 55.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 76.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 32.0, "eval_exact_match_for_task1615_sick_textual_entailment": 45.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 52.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 68.0, "eval_exact_match_for_task1659_billsum_title_generation": 1.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 17.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, "eval_exact_match_for_task190_snli_textual_entailment": 49.0, "eval_exact_match_for_task199_multinli_textual_entailment": 57.0, "eval_exact_match_for_task200_multinli_textual_entailment": 66.0, "eval_exact_match_for_task201_multinli_textual_entailment": 17.0, "eval_exact_match_for_task202_multinli_textual_entailment": 18.0, "eval_exact_match_for_task219_rocstories_title_generation": 3.0, "eval_exact_match_for_task220_rocstories_title_generation": 98.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 75.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 56.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, "eval_exact_match_for_task329_gap_coreference_resolution": 54.0, "eval_exact_match_for_task330_gap_coreference_resolution": 61.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 60.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 68.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 62.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 67.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 36.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 1.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 4.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 96.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, "eval_exact_match_for_task602_wikitext_title_generation": 2.381, "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 22.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 55.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 33.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 60.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 43.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 76.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 22.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 3.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 39.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 63.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 83.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 58.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 29.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 39.0, "eval_exact_match_for_task891_gap_coreference_resolution": 50.0, "eval_exact_match_for_task892_gap_coreference_resolution": 48.0, "eval_exact_match_for_task893_gap_coreference_resolution": 45.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 72.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 69.0, "eval_exact_match_for_textual_entailment": 42.1667, "eval_exact_match_for_title_generation": 10.426, "eval_exact_match_for_word_analogy": 45.75, "eval_f1": 50.1066, "eval_f1_for_answerability_classification": 54.641, "eval_f1_for_cause_effect_classification": 66.9694, "eval_f1_for_coreference_resolution": 51.8978, "eval_f1_for_data_to_text": 52.2967, "eval_f1_for_dialogue_act_recognition": 55.4286, "eval_f1_for_grammar_error_correction": 59.7, "eval_f1_for_keyword_tagging": 57.136, "eval_f1_for_overlap_extraction": 26.6784, "eval_f1_for_question_rewriting": 68.6066, "eval_f1_for_task020_mctaco_answerability_classification": 53.0, "eval_f1_for_task033_winogrande_coreference_resolution": 58.6667, "eval_f1_for_task034_winogrande_question_rewriting": 71.8445, "eval_f1_for_task035_winogrande_question_rewriting": 86.0263, "eval_f1_for_task036_qasc_keyword_tagging": 76.4659, "eval_f1_for_task039_qasc_overlap_extraction": 35.3333, "eval_f1_for_task050_multirc_answerability_classification": 56.0, "eval_f1_for_task102_commongen_data_to_text": 54.2967, "eval_f1_for_task1152_bard_word_analogy": 42.0, "eval_f1_for_task1153_bard_word_analogy": 32.0, "eval_f1_for_task1154_bard_word_analogy": 19.0, "eval_f1_for_task1155_bard_word_analogy": 88.0, "eval_f1_for_task1156_bard_word_analogy": 51.0, "eval_f1_for_task1157_bard_word_analogy": 58.0, "eval_f1_for_task1158_bard_word_analogy": 40.0, "eval_f1_for_task1159_bard_word_analogy": 36.0, "eval_f1_for_task1161_coda_19_title_generation": 37.2792, "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.9781, "eval_f1_for_task121_atomic_question_rewriting": 50.8439, "eval_f1_for_task133_winowhy_coreference_resolution": 54.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.5387, "eval_f1_for_task1344_rte_textual_entailment": 56.0, "eval_f1_for_task1345_qqp_question_rewriting": 40.1392, "eval_f1_for_task1356_xlsum_title_generation": 24.6784, "eval_f1_for_task1358_xlsum_title_generation": 32.8659, "eval_f1_for_task1385_anli_textual_entailment": 3.0, "eval_f1_for_task1386_anli_textual_entailment": 5.0, "eval_f1_for_task1387_anli_textual_entailment": 8.0, "eval_f1_for_task1388_cb_textual_entailment": 1.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 57.0, "eval_f1_for_task1393_copa_cause_effect_classification": 81.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 64.0, "eval_f1_for_task1407_dart_data_to_text": 33.8475, "eval_f1_for_task1409_dart_data_to_text": 50.3665, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.2078, "eval_f1_for_task1439_doqa_answerability_classification": 57.0, "eval_f1_for_task1442_doqa_answerability_classification": 57.0, "eval_f1_for_task1516_imppres_textual_entailment": 37.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 61.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 34.3288, "eval_f1_for_task1554_scitail_textual_entailment": 76.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.1923, "eval_f1_for_task1562_zest_question_rewriting": 55.1199, "eval_f1_for_task1586_scifact_title_generation": 35.3887, "eval_f1_for_task1598_nyc_data_to_text": 50.8276, "eval_f1_for_task1612_sick_textual_entailment": 32.0, "eval_f1_for_task1615_sick_textual_entailment": 45.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.0212, "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_f1_for_task1631_open_pi_data_to_text": 94.9166, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 68.0, "eval_f1_for_task1659_billsum_title_generation": 35.3327, "eval_f1_for_task1664_wino_bias_coreference_resolution": 68.1667, "eval_f1_for_task1728_web_nlg_data_to_text": 62.2571, "eval_f1_for_task190_snli_textual_entailment": 49.0, "eval_f1_for_task199_multinli_textual_entailment": 57.0, "eval_f1_for_task200_multinli_textual_entailment": 66.0, "eval_f1_for_task201_multinli_textual_entailment": 17.0, "eval_f1_for_task202_multinli_textual_entailment": 18.0, "eval_f1_for_task219_rocstories_title_generation": 16.7913, "eval_f1_for_task220_rocstories_title_generation": 98.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, "eval_f1_for_task232_iirc_answerability_classification": 0.0, "eval_f1_for_task233_iirc_answerability_classification": 0.0, "eval_f1_for_task242_tweetqa_answerability_classification": 75.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 68.4833, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 18.0235, "eval_f1_for_task288_gigaword_title_generation": 29.4067, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 25.1667, "eval_f1_for_task329_gap_coreference_resolution": 54.0, "eval_f1_for_task330_gap_coreference_resolution": 67.8857, "eval_f1_for_task349_squad2.0_answerability_classification": 60.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 84.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 87.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 89.0, "eval_f1_for_task393_cod3s_cause_effect_classification": 28.7627, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 36.6667, "eval_f1_for_task402_grailqa_question_rewriting": 71.8718, "eval_f1_for_task418_persent_title_generation": 26.8735, "eval_f1_for_task442_com_qa_question_rewriting": 72.5266, "eval_f1_for_task500_scruples_title_generation": 20.4214, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.8471, "eval_f1_for_task520_aquamuse_answerability_classification": 96.0, "eval_f1_for_task569_recipe_nlg_title_generation": 36.3728, "eval_f1_for_task602_wikitext_title_generation": 13.8464, "eval_f1_for_task613_liar_keyword_tagging": 23.0, "eval_f1_for_task614_glucose_cause_effect_classification": 41.6896, "eval_f1_for_task619_ohsumed_title_generation": 44.8834, "eval_f1_for_task620_ohsumed_keyword_tagging": 43.6667, "eval_f1_for_task623_ohsumed_keyword_tagging": 55.0, "eval_f1_for_task640_e_snli_textual_entailment": 33.0, "eval_f1_for_task641_e_snli_textual_entailment": 60.0, "eval_f1_for_task642_e_snli_textual_entailment": 43.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 87.5476, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 33.8333, "eval_f1_for_task670_ambigqa_question_rewriting": 80.5862, "eval_f1_for_task671_ambigqa_question_rewriting": 67.7156, "eval_f1_for_task677_ollie_data_to_text": 33.6531, "eval_f1_for_task738_perspectrum_textual_entailment": 39.0, "eval_f1_for_task743_eurlex_title_generation": 39.6166, "eval_f1_for_task760_msr_sqa_data_to_text": 4.9772, "eval_f1_for_task769_qed_title_generation": 70.6746, "eval_f1_for_task827_copa_cause_effect_classification": 83.0, "eval_f1_for_task828_copa_cause_effect_classification": 58.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 29.0, "eval_f1_for_task890_gwsd_textual_entailment": 39.0, "eval_f1_for_task891_gap_coreference_resolution": 58.7, "eval_f1_for_task892_gap_coreference_resolution": 48.0, "eval_f1_for_task893_gap_coreference_resolution": 45.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 72.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_f1_for_task957_e2e_data_to_text": 50.512, "eval_f1_for_task970_sherliic_textual_entailment": 69.0, "eval_f1_for_textual_entailment": 42.1667, "eval_f1_for_title_generation": 36.4871, "eval_f1_for_word_analogy": 45.75, "eval_gen_len": 8.2826, "eval_global_step": 4500, "eval_loss": 1.1431068181991577, "eval_rouge1": 52.93, "eval_rouge1_for_answerability_classification": 54.641, "eval_rouge1_for_cause_effect_classification": 67.6319, "eval_rouge1_for_coreference_resolution": 52.2656, "eval_rouge1_for_data_to_text": 54.7719, "eval_rouge1_for_dialogue_act_recognition": 58.9159, "eval_rouge1_for_grammar_error_correction": 63.9527, "eval_rouge1_for_keyword_tagging": 61.8187, "eval_rouge1_for_overlap_extraction": 30.9939, "eval_rouge1_for_question_rewriting": 69.9957, "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 58.6667, "eval_rouge1_for_task034_winogrande_question_rewriting": 71.9364, "eval_rouge1_for_task035_winogrande_question_rewriting": 86.8942, "eval_rouge1_for_task036_qasc_keyword_tagging": 78.727, "eval_rouge1_for_task039_qasc_overlap_extraction": 42.6667, "eval_rouge1_for_task050_multirc_answerability_classification": 56.0, "eval_rouge1_for_task102_commongen_data_to_text": 65.2615, "eval_rouge1_for_task1152_bard_word_analogy": 42.0, "eval_rouge1_for_task1153_bard_word_analogy": 35.0, "eval_rouge1_for_task1154_bard_word_analogy": 19.0, "eval_rouge1_for_task1155_bard_word_analogy": 88.0, "eval_rouge1_for_task1156_bard_word_analogy": 52.0, "eval_rouge1_for_task1157_bard_word_analogy": 58.0, "eval_rouge1_for_task1158_bard_word_analogy": 40.0, "eval_rouge1_for_task1159_bard_word_analogy": 36.0, "eval_rouge1_for_task1161_coda_19_title_generation": 39.9807, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.2194, "eval_rouge1_for_task121_atomic_question_rewriting": 52.9863, "eval_rouge1_for_task133_winowhy_coreference_resolution": 54.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.1669, "eval_rouge1_for_task1344_rte_textual_entailment": 56.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 43.057, "eval_rouge1_for_task1356_xlsum_title_generation": 28.137, "eval_rouge1_for_task1358_xlsum_title_generation": 37.357, "eval_rouge1_for_task1385_anli_textual_entailment": 35.0, "eval_rouge1_for_task1386_anli_textual_entailment": 36.0, "eval_rouge1_for_task1387_anli_textual_entailment": 39.0, "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 57.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 81.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 69.0778, "eval_rouge1_for_task1407_dart_data_to_text": 34.0484, "eval_rouge1_for_task1409_dart_data_to_text": 51.2155, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.658, "eval_rouge1_for_task1439_doqa_answerability_classification": 57.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 57.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 37.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 61.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 51.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 37.856, "eval_rouge1_for_task1554_scitail_textual_entailment": 76.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.2475, "eval_rouge1_for_task1562_zest_question_rewriting": 57.7511, "eval_rouge1_for_task1586_scifact_title_generation": 40.2491, "eval_rouge1_for_task1598_nyc_data_to_text": 51.3212, "eval_rouge1_for_task1612_sick_textual_entailment": 32.0, "eval_rouge1_for_task1615_sick_textual_entailment": 81.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.2995, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 95.2448, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 68.0, "eval_rouge1_for_task1659_billsum_title_generation": 37.4252, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 68.1667, "eval_rouge1_for_task1728_web_nlg_data_to_text": 64.0033, "eval_rouge1_for_task190_snli_textual_entailment": 49.0, "eval_rouge1_for_task199_multinli_textual_entailment": 57.0, "eval_rouge1_for_task200_multinli_textual_entailment": 66.0, "eval_rouge1_for_task201_multinli_textual_entailment": 17.0, "eval_rouge1_for_task202_multinli_textual_entailment": 18.0, "eval_rouge1_for_task219_rocstories_title_generation": 20.8938, "eval_rouge1_for_task220_rocstories_title_generation": 98.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, "eval_rouge1_for_task232_iirc_answerability_classification": 0.0, "eval_rouge1_for_task233_iirc_answerability_classification": 0.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 75.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 69.2333, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 19.3212, "eval_rouge1_for_task288_gigaword_title_generation": 32.437, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 25.6667, "eval_rouge1_for_task329_gap_coreference_resolution": 54.0, "eval_rouge1_for_task330_gap_coreference_resolution": 67.9857, "eval_rouge1_for_task349_squad2.0_answerability_classification": 60.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 84.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 87.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 89.0, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 28.5389, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 39.6667, "eval_rouge1_for_task402_grailqa_question_rewriting": 73.4259, "eval_rouge1_for_task418_persent_title_generation": 30.6762, "eval_rouge1_for_task442_com_qa_question_rewriting": 75.8486, "eval_rouge1_for_task500_scruples_title_generation": 22.9637, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.5136, "eval_rouge1_for_task520_aquamuse_answerability_classification": 96.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 37.8332, "eval_rouge1_for_task602_wikitext_title_generation": 14.6518, "eval_rouge1_for_task613_liar_keyword_tagging": 36.5, "eval_rouge1_for_task614_glucose_cause_effect_classification": 46.5512, "eval_rouge1_for_task619_ohsumed_title_generation": 47.7622, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 50.819, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 55.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 39.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 60.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 43.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 88.0476, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 34.3, "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.2943, "eval_rouge1_for_task671_ambigqa_question_rewriting": 68.2397, "eval_rouge1_for_task677_ollie_data_to_text": 36.4075, "eval_rouge1_for_task738_perspectrum_textual_entailment": 42.0, "eval_rouge1_for_task743_eurlex_title_generation": 41.2229, "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.2901, "eval_rouge1_for_task769_qed_title_generation": 71.2662, "eval_rouge1_for_task827_copa_cause_effect_classification": 83.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 58.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.3333, "eval_rouge1_for_task890_gwsd_textual_entailment": 39.0, "eval_rouge1_for_task891_gap_coreference_resolution": 59.0333, "eval_rouge1_for_task892_gap_coreference_resolution": 48.0, "eval_rouge1_for_task893_gap_coreference_resolution": 45.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 72.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_rouge1_for_task957_e2e_data_to_text": 53.5386, "eval_rouge1_for_task970_sherliic_textual_entailment": 69.0, "eval_rouge1_for_textual_entailment": 49.6111, "eval_rouge1_for_title_generation": 38.9041, "eval_rouge1_for_word_analogy": 46.25, "eval_rougeL": 51.6039, "eval_rougeL_for_answerability_classification": 54.641, "eval_rougeL_for_cause_effect_classification": 66.8821, "eval_rougeL_for_coreference_resolution": 52.2656, "eval_rougeL_for_data_to_text": 47.5211, "eval_rougeL_for_dialogue_act_recognition": 58.9159, "eval_rougeL_for_grammar_error_correction": 63.2655, "eval_rougeL_for_keyword_tagging": 61.4587, "eval_rougeL_for_overlap_extraction": 30.9662, "eval_rougeL_for_question_rewriting": 66.6401, "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 58.6667, "eval_rougeL_for_task034_winogrande_question_rewriting": 70.6132, "eval_rougeL_for_task035_winogrande_question_rewriting": 85.8466, "eval_rougeL_for_task036_qasc_keyword_tagging": 78.1603, "eval_rougeL_for_task039_qasc_overlap_extraction": 42.6667, "eval_rougeL_for_task050_multirc_answerability_classification": 56.0, "eval_rougeL_for_task102_commongen_data_to_text": 56.4337, "eval_rougeL_for_task1152_bard_word_analogy": 42.0, "eval_rougeL_for_task1153_bard_word_analogy": 35.0, "eval_rougeL_for_task1154_bard_word_analogy": 19.0, "eval_rougeL_for_task1155_bard_word_analogy": 88.0, "eval_rougeL_for_task1156_bard_word_analogy": 52.0, "eval_rougeL_for_task1157_bard_word_analogy": 58.0, "eval_rougeL_for_task1158_bard_word_analogy": 40.0, "eval_rougeL_for_task1159_bard_word_analogy": 36.0, "eval_rougeL_for_task1161_coda_19_title_generation": 33.3192, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.6433, "eval_rougeL_for_task121_atomic_question_rewriting": 47.9416, "eval_rougeL_for_task133_winowhy_coreference_resolution": 54.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.6332, "eval_rougeL_for_task1344_rte_textual_entailment": 56.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 40.0965, "eval_rougeL_for_task1356_xlsum_title_generation": 23.7127, "eval_rougeL_for_task1358_xlsum_title_generation": 32.6212, "eval_rougeL_for_task1385_anli_textual_entailment": 35.0, "eval_rougeL_for_task1386_anli_textual_entailment": 36.0, "eval_rougeL_for_task1387_anli_textual_entailment": 39.0, "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 51.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 57.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 81.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 69.0778, "eval_rougeL_for_task1407_dart_data_to_text": 30.9982, "eval_rougeL_for_task1409_dart_data_to_text": 43.4943, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.0225, "eval_rougeL_for_task1439_doqa_answerability_classification": 57.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 57.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 37.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 61.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 51.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 55.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 35.4467, "eval_rougeL_for_task1554_scitail_textual_entailment": 76.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.5084, "eval_rougeL_for_task1562_zest_question_rewriting": 51.2749, "eval_rougeL_for_task1586_scifact_title_generation": 33.1288, "eval_rougeL_for_task1598_nyc_data_to_text": 39.5457, "eval_rougeL_for_task1612_sick_textual_entailment": 32.0, "eval_rougeL_for_task1615_sick_textual_entailment": 81.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.9828, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 92.866, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 68.0, "eval_rougeL_for_task1659_billsum_title_generation": 31.6802, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 68.1667, "eval_rougeL_for_task1728_web_nlg_data_to_text": 56.2253, "eval_rougeL_for_task190_snli_textual_entailment": 49.0, "eval_rougeL_for_task199_multinli_textual_entailment": 57.0, "eval_rougeL_for_task200_multinli_textual_entailment": 66.0, "eval_rougeL_for_task201_multinli_textual_entailment": 17.0, "eval_rougeL_for_task202_multinli_textual_entailment": 18.0, "eval_rougeL_for_task219_rocstories_title_generation": 20.8938, "eval_rougeL_for_task220_rocstories_title_generation": 98.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, "eval_rougeL_for_task232_iirc_answerability_classification": 0.0, "eval_rougeL_for_task233_iirc_answerability_classification": 0.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 75.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 69.2333, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 19.2657, "eval_rougeL_for_task288_gigaword_title_generation": 28.0496, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 25.6667, "eval_rougeL_for_task329_gap_coreference_resolution": 54.0, "eval_rougeL_for_task330_gap_coreference_resolution": 67.9857, "eval_rougeL_for_task349_squad2.0_answerability_classification": 60.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 84.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 87.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 89.0, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 27.9601, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 39.6667, "eval_rougeL_for_task402_grailqa_question_rewriting": 61.5451, "eval_rougeL_for_task418_persent_title_generation": 27.3056, "eval_rougeL_for_task442_com_qa_question_rewriting": 71.0572, "eval_rougeL_for_task500_scruples_title_generation": 21.3818, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 41.845, "eval_rougeL_for_task520_aquamuse_answerability_classification": 96.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 36.9046, "eval_rougeL_for_task602_wikitext_title_generation": 14.6518, "eval_rougeL_for_task613_liar_keyword_tagging": 36.5, "eval_rougeL_for_task614_glucose_cause_effect_classification": 41.8815, "eval_rougeL_for_task619_ohsumed_title_generation": 41.6933, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 49.5857, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 55.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 39.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 60.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 43.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 88.0476, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 34.3, "eval_rougeL_for_task670_ambigqa_question_rewriting": 80.7048, "eval_rougeL_for_task671_ambigqa_question_rewriting": 67.3355, "eval_rougeL_for_task677_ollie_data_to_text": 30.8777, "eval_rougeL_for_task738_perspectrum_textual_entailment": 42.0, "eval_rougeL_for_task743_eurlex_title_generation": 37.2016, "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.5444, "eval_rougeL_for_task769_qed_title_generation": 71.2662, "eval_rougeL_for_task827_copa_cause_effect_classification": 83.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 58.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.3333, "eval_rougeL_for_task890_gwsd_textual_entailment": 39.0, "eval_rougeL_for_task891_gap_coreference_resolution": 59.0333, "eval_rougeL_for_task892_gap_coreference_resolution": 48.0, "eval_rougeL_for_task893_gap_coreference_resolution": 45.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 72.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_rougeL_for_task957_e2e_data_to_text": 40.9021, "eval_rougeL_for_task970_sherliic_textual_entailment": 69.0, "eval_rougeL_for_textual_entailment": 49.6111, "eval_rougeL_for_title_generation": 35.9524, "eval_rougeL_for_word_analogy": 46.25, "eval_runtime": 780.6771, "eval_samples_per_second": 15.256, "eval_steps_per_second": 0.954, "step": 4500 }, { "epoch": 1.09, "learning_rate": 5e-05, "loss": 0.7881, "step": 5000 }, { "epoch": 1.09, "eval_exact_match": 35.5919, "eval_exact_match_for_answerability_classification": 59.3846, "eval_exact_match_for_cause_effect_classification": 50.8571, "eval_exact_match_for_coreference_resolution": 46.7143, "eval_exact_match_for_data_to_text": 7.2639, "eval_exact_match_for_dialogue_act_recognition": 58.0, "eval_exact_match_for_grammar_error_correction": 6.0, "eval_exact_match_for_keyword_tagging": 46.6, "eval_exact_match_for_overlap_extraction": 19.0, "eval_exact_match_for_question_rewriting": 2.1818, "eval_exact_match_for_task020_mctaco_answerability_classification": 55.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 55.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 5.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 19.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 37.0, "eval_exact_match_for_task050_multirc_answerability_classification": 75.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 43.0, "eval_exact_match_for_task1153_bard_word_analogy": 37.0, "eval_exact_match_for_task1154_bard_word_analogy": 29.0, "eval_exact_match_for_task1155_bard_word_analogy": 79.0, "eval_exact_match_for_task1156_bard_word_analogy": 59.0, "eval_exact_match_for_task1157_bard_word_analogy": 64.0, "eval_exact_match_for_task1158_bard_word_analogy": 40.0, "eval_exact_match_for_task1159_bard_word_analogy": 32.0, "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 9.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 65.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 60.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 16.0, "eval_exact_match_for_task1386_anli_textual_entailment": 24.0, "eval_exact_match_for_task1387_anli_textual_entailment": 24.0, "eval_exact_match_for_task1388_cb_textual_entailment": 18.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 71.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 83.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 66.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 1.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 36.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 72.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 51.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 58.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 36.0, "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 76.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 12.0, "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 53.0, "eval_exact_match_for_task1615_sick_textual_entailment": 57.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 68.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 54.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 70.0, "eval_exact_match_for_task1659_billsum_title_generation": 1.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 24.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, "eval_exact_match_for_task190_snli_textual_entailment": 50.0, "eval_exact_match_for_task199_multinli_textual_entailment": 42.0, "eval_exact_match_for_task200_multinli_textual_entailment": 90.0, "eval_exact_match_for_task201_multinli_textual_entailment": 18.0, "eval_exact_match_for_task202_multinli_textual_entailment": 8.0, "eval_exact_match_for_task219_rocstories_title_generation": 2.0, "eval_exact_match_for_task220_rocstories_title_generation": 98.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, "eval_exact_match_for_task232_iirc_answerability_classification": 19.0, "eval_exact_match_for_task233_iirc_answerability_classification": 25.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 94.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 58.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 11.0, "eval_exact_match_for_task329_gap_coreference_resolution": 52.0, "eval_exact_match_for_task330_gap_coreference_resolution": 57.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 60.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 73.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 65.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 61.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 38.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, "eval_exact_match_for_task418_persent_title_generation": 2.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 90.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 5.0, "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 18.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 90.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 39.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 47.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 42.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 21.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 33.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 73.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 86.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 61.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 79.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 43.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 45.0, "eval_exact_match_for_task891_gap_coreference_resolution": 54.0, "eval_exact_match_for_task892_gap_coreference_resolution": 42.0, "eval_exact_match_for_task893_gap_coreference_resolution": 56.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 70.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 77.0, "eval_exact_match_for_textual_entailment": 46.25, "eval_exact_match_for_title_generation": 10.7063, "eval_exact_match_for_word_analogy": 47.875, "eval_f1": 53.4706, "eval_f1_for_answerability_classification": 61.9487, "eval_f1_for_cause_effect_classification": 67.6678, "eval_f1_for_coreference_resolution": 55.6713, "eval_f1_for_data_to_text": 53.5694, "eval_f1_for_dialogue_act_recognition": 59.9286, "eval_f1_for_grammar_error_correction": 62.0603, "eval_f1_for_keyword_tagging": 61.8222, "eval_f1_for_overlap_extraction": 44.2158, "eval_f1_for_question_rewriting": 68.8878, "eval_f1_for_task020_mctaco_answerability_classification": 55.0, "eval_f1_for_task033_winogrande_coreference_resolution": 58.3333, "eval_f1_for_task034_winogrande_question_rewriting": 71.5688, "eval_f1_for_task035_winogrande_question_rewriting": 87.5608, "eval_f1_for_task036_qasc_keyword_tagging": 62.2445, "eval_f1_for_task039_qasc_overlap_extraction": 41.0, "eval_f1_for_task050_multirc_answerability_classification": 75.0, "eval_f1_for_task102_commongen_data_to_text": 55.5361, "eval_f1_for_task1152_bard_word_analogy": 43.0, "eval_f1_for_task1153_bard_word_analogy": 37.0, "eval_f1_for_task1154_bard_word_analogy": 29.0, "eval_f1_for_task1155_bard_word_analogy": 79.0, "eval_f1_for_task1156_bard_word_analogy": 59.0, "eval_f1_for_task1157_bard_word_analogy": 64.0, "eval_f1_for_task1158_bard_word_analogy": 40.0, "eval_f1_for_task1159_bard_word_analogy": 32.0, "eval_f1_for_task1161_coda_19_title_generation": 38.0813, "eval_f1_for_task1195_disfl_qa_question_rewriting": 83.0908, "eval_f1_for_task121_atomic_question_rewriting": 51.8991, "eval_f1_for_task133_winowhy_coreference_resolution": 65.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.9368, "eval_f1_for_task1344_rte_textual_entailment": 60.0, "eval_f1_for_task1345_qqp_question_rewriting": 40.7303, "eval_f1_for_task1356_xlsum_title_generation": 22.7539, "eval_f1_for_task1358_xlsum_title_generation": 33.3508, "eval_f1_for_task1385_anli_textual_entailment": 16.0, "eval_f1_for_task1386_anli_textual_entailment": 24.0, "eval_f1_for_task1387_anli_textual_entailment": 24.0, "eval_f1_for_task1388_cb_textual_entailment": 18.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 71.0, "eval_f1_for_task1393_copa_cause_effect_classification": 83.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 66.0, "eval_f1_for_task1407_dart_data_to_text": 36.7484, "eval_f1_for_task1409_dart_data_to_text": 57.3007, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.0962, "eval_f1_for_task1439_doqa_answerability_classification": 50.0, "eval_f1_for_task1442_doqa_answerability_classification": 57.0, "eval_f1_for_task1516_imppres_textual_entailment": 36.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 72.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 51.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 58.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 36.0, "eval_f1_for_task1540_peer_read_title_generation": 37.3066, "eval_f1_for_task1554_scitail_textual_entailment": 76.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.0243, "eval_f1_for_task1562_zest_question_rewriting": 53.3264, "eval_f1_for_task1586_scifact_title_generation": 37.2215, "eval_f1_for_task1598_nyc_data_to_text": 51.4712, "eval_f1_for_task1612_sick_textual_entailment": 53.0, "eval_f1_for_task1615_sick_textual_entailment": 57.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.1068, "eval_f1_for_task1624_disfl_qa_answerability_classification": 68.0, "eval_f1_for_task1631_open_pi_data_to_text": 95.1871, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 70.0, "eval_f1_for_task1659_billsum_title_generation": 34.3602, "eval_f1_for_task1664_wino_bias_coreference_resolution": 69.7381, "eval_f1_for_task1728_web_nlg_data_to_text": 66.8204, "eval_f1_for_task190_snli_textual_entailment": 50.0, "eval_f1_for_task199_multinli_textual_entailment": 42.0, "eval_f1_for_task200_multinli_textual_entailment": 90.0, "eval_f1_for_task201_multinli_textual_entailment": 18.0, "eval_f1_for_task202_multinli_textual_entailment": 8.0, "eval_f1_for_task219_rocstories_title_generation": 19.4643, "eval_f1_for_task220_rocstories_title_generation": 98.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, "eval_f1_for_task232_iirc_answerability_classification": 19.0, "eval_f1_for_task233_iirc_answerability_classification": 25.0, "eval_f1_for_task242_tweetqa_answerability_classification": 94.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 68.2333, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 47.4315, "eval_f1_for_task288_gigaword_title_generation": 28.6204, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 33.5333, "eval_f1_for_task329_gap_coreference_resolution": 52.0, "eval_f1_for_task330_gap_coreference_resolution": 65.1857, "eval_f1_for_task349_squad2.0_answerability_classification": 60.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 86.5, "eval_f1_for_task391_cod3s_cause_effect_classification": 88.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 87.0, "eval_f1_for_task393_cod3s_cause_effect_classification": 31.6543, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 39.8333, "eval_f1_for_task402_grailqa_question_rewriting": 73.7294, "eval_f1_for_task418_persent_title_generation": 30.0215, "eval_f1_for_task442_com_qa_question_rewriting": 72.7769, "eval_f1_for_task500_scruples_title_generation": 20.1633, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.0541, "eval_f1_for_task520_aquamuse_answerability_classification": 90.0, "eval_f1_for_task569_recipe_nlg_title_generation": 36.3183, "eval_f1_for_task602_wikitext_title_generation": 13.7322, "eval_f1_for_task613_liar_keyword_tagging": 24.0, "eval_f1_for_task614_glucose_cause_effect_classification": 36.6867, "eval_f1_for_task619_ohsumed_title_generation": 46.7253, "eval_f1_for_task620_ohsumed_keyword_tagging": 38.8, "eval_f1_for_task623_ohsumed_keyword_tagging": 90.0, "eval_f1_for_task640_e_snli_textual_entailment": 39.0, "eval_f1_for_task641_e_snli_textual_entailment": 47.0, "eval_f1_for_task642_e_snli_textual_entailment": 42.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.0667, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 46.0071, "eval_f1_for_task670_ambigqa_question_rewriting": 81.7579, "eval_f1_for_task671_ambigqa_question_rewriting": 62.2191, "eval_f1_for_task677_ollie_data_to_text": 28.2783, "eval_f1_for_task738_perspectrum_textual_entailment": 33.0, "eval_f1_for_task743_eurlex_title_generation": 37.7464, "eval_f1_for_task760_msr_sqa_data_to_text": 5.8144, "eval_f1_for_task769_qed_title_generation": 85.8746, "eval_f1_for_task827_copa_cause_effect_classification": 86.0, "eval_f1_for_task828_copa_cause_effect_classification": 61.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 79.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 43.0, "eval_f1_for_task890_gwsd_textual_entailment": 45.0, "eval_f1_for_task891_gap_coreference_resolution": 62.5333, "eval_f1_for_task892_gap_coreference_resolution": 42.0, "eval_f1_for_task893_gap_coreference_resolution": 56.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 70.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, "eval_f1_for_task957_e2e_data_to_text": 49.6294, "eval_f1_for_task970_sherliic_textual_entailment": 77.0, "eval_f1_for_textual_entailment": 46.25, "eval_f1_for_title_generation": 37.8663, "eval_f1_for_word_analogy": 47.875, "eval_gen_len": 9.2936, "eval_global_step": 5000, "eval_loss": 1.176875352859497, "eval_rouge1": 55.8618, "eval_rouge1_for_answerability_classification": 61.9487, "eval_rouge1_for_cause_effect_classification": 68.4126, "eval_rouge1_for_coreference_resolution": 56.3015, "eval_rouge1_for_data_to_text": 56.5579, "eval_rouge1_for_dialogue_act_recognition": 62.1905, "eval_rouge1_for_grammar_error_correction": 66.8628, "eval_rouge1_for_keyword_tagging": 67.3275, "eval_rouge1_for_overlap_extraction": 47.6942, "eval_rouge1_for_question_rewriting": 70.3165, "eval_rouge1_for_task020_mctaco_answerability_classification": 55.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 58.3333, "eval_rouge1_for_task034_winogrande_question_rewriting": 71.6269, "eval_rouge1_for_task035_winogrande_question_rewriting": 88.1556, "eval_rouge1_for_task036_qasc_keyword_tagging": 69.2374, "eval_rouge1_for_task039_qasc_overlap_extraction": 47.0, "eval_rouge1_for_task050_multirc_answerability_classification": 75.0, "eval_rouge1_for_task102_commongen_data_to_text": 67.6891, "eval_rouge1_for_task1152_bard_word_analogy": 43.0, "eval_rouge1_for_task1153_bard_word_analogy": 39.0, "eval_rouge1_for_task1154_bard_word_analogy": 29.0, "eval_rouge1_for_task1155_bard_word_analogy": 79.0, "eval_rouge1_for_task1156_bard_word_analogy": 59.0, "eval_rouge1_for_task1157_bard_word_analogy": 64.0, "eval_rouge1_for_task1158_bard_word_analogy": 40.0, "eval_rouge1_for_task1159_bard_word_analogy": 32.0, "eval_rouge1_for_task1161_coda_19_title_generation": 41.798, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 83.302, "eval_rouge1_for_task121_atomic_question_rewriting": 54.0929, "eval_rouge1_for_task133_winowhy_coreference_resolution": 65.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.67, "eval_rouge1_for_task1344_rte_textual_entailment": 60.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 43.8855, "eval_rouge1_for_task1356_xlsum_title_generation": 26.2771, "eval_rouge1_for_task1358_xlsum_title_generation": 38.1057, "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, "eval_rouge1_for_task1387_anli_textual_entailment": 39.0, "eval_rouge1_for_task1388_cb_textual_entailment": 32.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 71.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 83.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 68.5, "eval_rouge1_for_task1407_dart_data_to_text": 38.0392, "eval_rouge1_for_task1409_dart_data_to_text": 58.8212, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 46.8451, "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 57.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 36.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 72.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 55.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 58.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 36.0, "eval_rouge1_for_task1540_peer_read_title_generation": 41.2851, "eval_rouge1_for_task1554_scitail_textual_entailment": 76.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.8804, "eval_rouge1_for_task1562_zest_question_rewriting": 56.0189, "eval_rouge1_for_task1586_scifact_title_generation": 41.2943, "eval_rouge1_for_task1598_nyc_data_to_text": 52.6696, "eval_rouge1_for_task1612_sick_textual_entailment": 53.0, "eval_rouge1_for_task1615_sick_textual_entailment": 85.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.3088, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 68.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 95.381, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 70.0, "eval_rouge1_for_task1659_billsum_title_generation": 36.3059, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 69.7381, "eval_rouge1_for_task1728_web_nlg_data_to_text": 68.5502, "eval_rouge1_for_task190_snli_textual_entailment": 50.0, "eval_rouge1_for_task199_multinli_textual_entailment": 42.0, "eval_rouge1_for_task200_multinli_textual_entailment": 90.0, "eval_rouge1_for_task201_multinli_textual_entailment": 18.0, "eval_rouge1_for_task202_multinli_textual_entailment": 8.0, "eval_rouge1_for_task219_rocstories_title_generation": 23.3398, "eval_rouge1_for_task220_rocstories_title_generation": 98.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, "eval_rouge1_for_task232_iirc_answerability_classification": 19.0, "eval_rouge1_for_task233_iirc_answerability_classification": 25.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 94.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 68.9, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 48.3885, "eval_rouge1_for_task288_gigaword_title_generation": 31.3832, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 34.6, "eval_rouge1_for_task329_gap_coreference_resolution": 52.0, "eval_rouge1_for_task330_gap_coreference_resolution": 65.3429, "eval_rouge1_for_task349_squad2.0_answerability_classification": 60.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 86.5, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 88.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.0, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.22, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 46.1667, "eval_rouge1_for_task402_grailqa_question_rewriting": 75.6669, "eval_rouge1_for_task418_persent_title_generation": 33.247, "eval_rouge1_for_task442_com_qa_question_rewriting": 76.0259, "eval_rouge1_for_task500_scruples_title_generation": 22.0698, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.3234, "eval_rouge1_for_task520_aquamuse_answerability_classification": 90.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 38.1412, "eval_rouge1_for_task602_wikitext_title_generation": 14.663, "eval_rouge1_for_task613_liar_keyword_tagging": 36.8333, "eval_rouge1_for_task614_glucose_cause_effect_classification": 41.3349, "eval_rouge1_for_task619_ohsumed_title_generation": 49.3055, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 46.0, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 90.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 39.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 47.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 42.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.5667, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 46.3738, "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.3875, "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.0107, "eval_rouge1_for_task677_ollie_data_to_text": 31.3286, "eval_rouge1_for_task738_perspectrum_textual_entailment": 78.0, "eval_rouge1_for_task743_eurlex_title_generation": 39.5581, "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.3643, "eval_rouge1_for_task769_qed_title_generation": 85.8746, "eval_rouge1_for_task827_copa_cause_effect_classification": 86.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 61.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 79.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 52.3333, "eval_rouge1_for_task890_gwsd_textual_entailment": 45.0, "eval_rouge1_for_task891_gap_coreference_resolution": 62.7667, "eval_rouge1_for_task892_gap_coreference_resolution": 42.0, "eval_rouge1_for_task893_gap_coreference_resolution": 56.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 70.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, "eval_rouge1_for_task957_e2e_data_to_text": 53.0349, "eval_rouge1_for_task970_sherliic_textual_entailment": 77.0, "eval_rouge1_for_textual_entailment": 51.5278, "eval_rouge1_for_title_generation": 40.2072, "eval_rouge1_for_word_analogy": 48.125, "eval_rougeL": 54.3825, "eval_rougeL_for_answerability_classification": 61.9487, "eval_rougeL_for_cause_effect_classification": 67.8048, "eval_rougeL_for_coreference_resolution": 56.3015, "eval_rougeL_for_data_to_text": 48.1034, "eval_rougeL_for_dialogue_act_recognition": 62.1905, "eval_rougeL_for_grammar_error_correction": 65.7425, "eval_rougeL_for_keyword_tagging": 66.6022, "eval_rougeL_for_overlap_extraction": 46.6992, "eval_rougeL_for_question_rewriting": 66.9184, "eval_rougeL_for_task020_mctaco_answerability_classification": 55.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 58.3333, "eval_rougeL_for_task034_winogrande_question_rewriting": 70.9799, "eval_rougeL_for_task035_winogrande_question_rewriting": 87.3353, "eval_rougeL_for_task036_qasc_keyword_tagging": 67.5942, "eval_rougeL_for_task039_qasc_overlap_extraction": 47.0, "eval_rougeL_for_task050_multirc_answerability_classification": 75.0, "eval_rougeL_for_task102_commongen_data_to_text": 56.5163, "eval_rougeL_for_task1152_bard_word_analogy": 43.0, "eval_rougeL_for_task1153_bard_word_analogy": 39.0, "eval_rougeL_for_task1154_bard_word_analogy": 29.0, "eval_rougeL_for_task1155_bard_word_analogy": 79.0, "eval_rougeL_for_task1156_bard_word_analogy": 59.0, "eval_rougeL_for_task1157_bard_word_analogy": 64.0, "eval_rougeL_for_task1158_bard_word_analogy": 40.0, "eval_rougeL_for_task1159_bard_word_analogy": 32.0, "eval_rougeL_for_task1161_coda_19_title_generation": 35.0257, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 82.487, "eval_rougeL_for_task121_atomic_question_rewriting": 48.2484, "eval_rougeL_for_task133_winowhy_coreference_resolution": 65.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.106, "eval_rougeL_for_task1344_rte_textual_entailment": 60.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 40.8782, "eval_rougeL_for_task1356_xlsum_title_generation": 23.02, "eval_rougeL_for_task1358_xlsum_title_generation": 32.3141, "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, "eval_rougeL_for_task1387_anli_textual_entailment": 39.0, "eval_rougeL_for_task1388_cb_textual_entailment": 32.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 71.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 83.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 68.5, "eval_rougeL_for_task1407_dart_data_to_text": 32.9729, "eval_rougeL_for_task1409_dart_data_to_text": 47.2712, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 45.4694, "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 57.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 36.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 72.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 55.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 58.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 36.0, "eval_rougeL_for_task1540_peer_read_title_generation": 37.4945, "eval_rougeL_for_task1554_scitail_textual_entailment": 76.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.0155, "eval_rougeL_for_task1562_zest_question_rewriting": 49.5683, "eval_rougeL_for_task1586_scifact_title_generation": 34.3436, "eval_rougeL_for_task1598_nyc_data_to_text": 40.7632, "eval_rougeL_for_task1612_sick_textual_entailment": 53.0, "eval_rougeL_for_task1615_sick_textual_entailment": 85.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.2385, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 68.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 93.1347, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 70.0, "eval_rougeL_for_task1659_billsum_title_generation": 30.7011, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 69.7381, "eval_rougeL_for_task1728_web_nlg_data_to_text": 58.7091, "eval_rougeL_for_task190_snli_textual_entailment": 50.0, "eval_rougeL_for_task199_multinli_textual_entailment": 42.0, "eval_rougeL_for_task200_multinli_textual_entailment": 90.0, "eval_rougeL_for_task201_multinli_textual_entailment": 18.0, "eval_rougeL_for_task202_multinli_textual_entailment": 8.0, "eval_rougeL_for_task219_rocstories_title_generation": 23.3398, "eval_rougeL_for_task220_rocstories_title_generation": 98.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, "eval_rougeL_for_task232_iirc_answerability_classification": 19.0, "eval_rougeL_for_task233_iirc_answerability_classification": 25.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 94.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 68.9, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 46.3985, "eval_rougeL_for_task288_gigaword_title_generation": 27.9879, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 34.6, "eval_rougeL_for_task329_gap_coreference_resolution": 52.0, "eval_rougeL_for_task330_gap_coreference_resolution": 65.3429, "eval_rougeL_for_task349_squad2.0_answerability_classification": 60.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 86.5, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 88.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.0, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.3681, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 46.1667, "eval_rougeL_for_task402_grailqa_question_rewriting": 62.8613, "eval_rougeL_for_task418_persent_title_generation": 29.0277, "eval_rougeL_for_task442_com_qa_question_rewriting": 71.8909, "eval_rougeL_for_task500_scruples_title_generation": 20.0736, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.2147, "eval_rougeL_for_task520_aquamuse_answerability_classification": 90.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 36.5238, "eval_rougeL_for_task602_wikitext_title_generation": 14.5748, "eval_rougeL_for_task613_liar_keyword_tagging": 36.8333, "eval_rougeL_for_task614_glucose_cause_effect_classification": 37.9323, "eval_rougeL_for_task619_ohsumed_title_generation": 42.4314, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 44.0167, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 90.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 39.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 47.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 42.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.5667, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 46.3738, "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.7801, "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.8345, "eval_rougeL_for_task677_ollie_data_to_text": 26.2533, "eval_rougeL_for_task738_perspectrum_textual_entailment": 78.0, "eval_rougeL_for_task743_eurlex_title_generation": 34.7101, "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.5196, "eval_rougeL_for_task769_qed_title_generation": 85.8746, "eval_rougeL_for_task827_copa_cause_effect_classification": 86.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 61.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 79.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 52.3333, "eval_rougeL_for_task890_gwsd_textual_entailment": 45.0, "eval_rougeL_for_task891_gap_coreference_resolution": 62.7667, "eval_rougeL_for_task892_gap_coreference_resolution": 42.0, "eval_rougeL_for_task893_gap_coreference_resolution": 56.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 70.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, "eval_rougeL_for_task957_e2e_data_to_text": 40.2784, "eval_rougeL_for_task970_sherliic_textual_entailment": 77.0, "eval_rougeL_for_textual_entailment": 51.5278, "eval_rougeL_for_title_generation": 37.0197, "eval_rougeL_for_word_analogy": 48.125, "eval_runtime": 879.1476, "eval_samples_per_second": 13.547, "eval_steps_per_second": 0.847, "step": 5000 }, { "epoch": 1.09, "step": 5000, "total_flos": 6.183247277184778e+17, "train_loss": 1.0100821884155273, "train_runtime": 34606.9164, "train_samples_per_second": 2.312, "train_steps_per_second": 0.144 } ], "max_steps": 5000, "num_train_epochs": 2, "total_flos": 6.183247277184778e+17, "trial_name": null, "trial_params": null }