diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7697 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0926472194908774, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 5.9395, + "step": 1 + }, + { + "epoch": 0.0, + "eval_exact_match": 0.1511, + "eval_exact_match_for_answerability_classification": 0.0, + "eval_exact_match_for_cause_effect_classification": 0.0, + "eval_exact_match_for_coreference_resolution": 0.0, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 0.0, + "eval_exact_match_for_grammar_error_correction": 5.0, + "eval_exact_match_for_keyword_tagging": 0.0, + "eval_exact_match_for_overlap_extraction": 0.0, + "eval_exact_match_for_question_rewriting": 0.7273, + "eval_exact_match_for_task020_mctaco_answerability_classification": 0.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 0.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 0.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 0.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 0.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 0.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 0.0, + "eval_exact_match_for_task1155_bard_word_analogy": 0.0, + "eval_exact_match_for_task1156_bard_word_analogy": 0.0, + "eval_exact_match_for_task1157_bard_word_analogy": 0.0, + "eval_exact_match_for_task1158_bard_word_analogy": 0.0, + "eval_exact_match_for_task1159_bard_word_analogy": 0.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 0.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 0.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 0.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 0.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 0.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 0.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 10.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 0.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 0.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 0.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 0.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 0.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 0.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 0.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 0.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 0.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 0.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 0.0, + "eval_exact_match_for_task613_liar_keyword_tagging": 0.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 0.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 0.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 0.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 0.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 0.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 0.0, + "eval_exact_match_for_textual_entailment": 0.0, + "eval_exact_match_for_title_generation": 0.0, + "eval_exact_match_for_word_analogy": 0.0, + "eval_f1": 11.9591, + "eval_f1_for_answerability_classification": 2.662, + "eval_f1_for_cause_effect_classification": 11.1366, + "eval_f1_for_coreference_resolution": 5.9022, + "eval_f1_for_data_to_text": 33.4191, + "eval_f1_for_dialogue_act_recognition": 3.285, + "eval_f1_for_grammar_error_correction": 50.5582, + "eval_f1_for_keyword_tagging": 11.6265, + "eval_f1_for_overlap_extraction": 15.3828, + "eval_f1_for_question_rewriting": 41.2187, + "eval_f1_for_task020_mctaco_answerability_classification": 3.2003, + "eval_f1_for_task033_winogrande_coreference_resolution": 4.2676, + "eval_f1_for_task034_winogrande_question_rewriting": 42.8072, + "eval_f1_for_task035_winogrande_question_rewriting": 33.0497, + "eval_f1_for_task036_qasc_keyword_tagging": 28.8602, + "eval_f1_for_task039_qasc_overlap_extraction": 1.4765, + "eval_f1_for_task050_multirc_answerability_classification": 1.75, + "eval_f1_for_task102_commongen_data_to_text": 33.1884, + "eval_f1_for_task1152_bard_word_analogy": 0.7359, + "eval_f1_for_task1153_bard_word_analogy": 0.1429, + "eval_f1_for_task1154_bard_word_analogy": 0.3402, + "eval_f1_for_task1155_bard_word_analogy": 6.5604, + "eval_f1_for_task1156_bard_word_analogy": 3.5375, + "eval_f1_for_task1157_bard_word_analogy": 2.4315, + "eval_f1_for_task1158_bard_word_analogy": 0.1227, + "eval_f1_for_task1159_bard_word_analogy": 1.6096, + "eval_f1_for_task1161_coda_19_title_generation": 17.6296, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 63.1777, + "eval_f1_for_task121_atomic_question_rewriting": 16.4897, + "eval_f1_for_task133_winowhy_coreference_resolution": 2.357, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 5.0307, + "eval_f1_for_task1344_rte_textual_entailment": 1.4955, + "eval_f1_for_task1345_qqp_question_rewriting": 25.2237, + "eval_f1_for_task1356_xlsum_title_generation": 6.613, + "eval_f1_for_task1358_xlsum_title_generation": 15.7524, + "eval_f1_for_task1385_anli_textual_entailment": 6.0267, + "eval_f1_for_task1386_anli_textual_entailment": 2.453, + "eval_f1_for_task1387_anli_textual_entailment": 2.8466, + "eval_f1_for_task1388_cb_textual_entailment": 5.952, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 3.5378, + "eval_f1_for_task1391_winogrande_coreference_resolution": 5.3769, + "eval_f1_for_task1393_copa_cause_effect_classification": 1.9281, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 0.7871, + "eval_f1_for_task1407_dart_data_to_text": 19.3827, + "eval_f1_for_task1409_dart_data_to_text": 24.5944, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.3056, + "eval_f1_for_task1439_doqa_answerability_classification": 1.0859, + "eval_f1_for_task1442_doqa_answerability_classification": 0.5939, + "eval_f1_for_task1516_imppres_textual_entailment": 1.8128, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 2.3408, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 1.1853, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 3.9203, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 3.9878, + "eval_f1_for_task1540_peer_read_title_generation": 6.1304, + "eval_f1_for_task1554_scitail_textual_entailment": 2.4816, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 71.8108, + "eval_f1_for_task1562_zest_question_rewriting": 19.4451, + "eval_f1_for_task1586_scifact_title_generation": 15.9579, + "eval_f1_for_task1598_nyc_data_to_text": 40.735, + "eval_f1_for_task1612_sick_textual_entailment": 1.6474, + "eval_f1_for_task1615_sick_textual_entailment": 1.1769, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 68.4425, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 1.487, + "eval_f1_for_task1631_open_pi_data_to_text": 59.4655, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 1.2256, + "eval_f1_for_task1659_billsum_title_generation": 17.1879, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 8.4291, + "eval_f1_for_task1728_web_nlg_data_to_text": 35.0972, + "eval_f1_for_task190_snli_textual_entailment": 0.7483, + "eval_f1_for_task199_multinli_textual_entailment": 3.453, + "eval_f1_for_task200_multinli_textual_entailment": 1.2186, + "eval_f1_for_task201_multinli_textual_entailment": 0.6632, + "eval_f1_for_task202_multinli_textual_entailment": 3.6899, + "eval_f1_for_task219_rocstories_title_generation": 6.3674, + "eval_f1_for_task220_rocstories_title_generation": 4.9488, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 2.0348, + "eval_f1_for_task232_iirc_answerability_classification": 2.3436, + "eval_f1_for_task233_iirc_answerability_classification": 2.0283, + "eval_f1_for_task242_tweetqa_answerability_classification": 4.2501, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 13.7873, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 29.2891, + "eval_f1_for_task288_gigaword_title_generation": 12.7731, + "eval_f1_for_task290_tellmewhy_answerability_classification": 11.8424, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 3.1503, + "eval_f1_for_task329_gap_coreference_resolution": 3.8855, + "eval_f1_for_task330_gap_coreference_resolution": 11.5722, + "eval_f1_for_task349_squad2.0_answerability_classification": 1.2724, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 6.9229, + "eval_f1_for_task391_cod3s_cause_effect_classification": 8.9327, + "eval_f1_for_task392_cod3s_cause_effect_classification": 7.8976, + "eval_f1_for_task393_cod3s_cause_effect_classification": 21.8259, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 4.3319, + "eval_f1_for_task402_grailqa_question_rewriting": 25.187, + "eval_f1_for_task418_persent_title_generation": 14.8523, + "eval_f1_for_task442_com_qa_question_rewriting": 59.2076, + "eval_f1_for_task500_scruples_title_generation": 11.5076, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 16.1998, + "eval_f1_for_task520_aquamuse_answerability_classification": 1.4911, + "eval_f1_for_task569_recipe_nlg_title_generation": 6.8146, + "eval_f1_for_task602_wikitext_title_generation": 5.0499, + "eval_f1_for_task613_liar_keyword_tagging": 2.1098, + "eval_f1_for_task614_glucose_cause_effect_classification": 30.4996, + "eval_f1_for_task619_ohsumed_title_generation": 17.2666, + "eval_f1_for_task620_ohsumed_keyword_tagging": 2.8354, + "eval_f1_for_task623_ohsumed_keyword_tagging": 1.7299, + "eval_f1_for_task640_e_snli_textual_entailment": 4.2833, + "eval_f1_for_task641_e_snli_textual_entailment": 0.0, + "eval_f1_for_task642_e_snli_textual_entailment": 3.5109, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 22.597, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 13.749, + "eval_f1_for_task670_ambigqa_question_rewriting": 74.6275, + "eval_f1_for_task671_ambigqa_question_rewriting": 25.7475, + "eval_f1_for_task677_ollie_data_to_text": 26.5582, + "eval_f1_for_task738_perspectrum_textual_entailment": 7.9888, + "eval_f1_for_task743_eurlex_title_generation": 18.283, + "eval_f1_for_task760_msr_sqa_data_to_text": 7.2628, + "eval_f1_for_task769_qed_title_generation": 8.2919, + "eval_f1_for_task827_copa_cause_effect_classification": 1.883, + "eval_f1_for_task828_copa_cause_effect_classification": 4.989, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 4.7565, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 1.4352, + "eval_f1_for_task890_gwsd_textual_entailment": 1.5128, + "eval_f1_for_task891_gap_coreference_resolution": 4.4227, + "eval_f1_for_task892_gap_coreference_resolution": 1.5096, + "eval_f1_for_task893_gap_coreference_resolution": 2.2537, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 3.3089, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 1.5539, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 3.3796, + "eval_f1_for_task957_e2e_data_to_text": 35.132, + "eval_f1_for_task970_sherliic_textual_entailment": 1.8119, + "eval_f1_for_textual_entailment": 2.7232, + "eval_f1_for_title_generation": 11.5386, + "eval_f1_for_word_analogy": 1.9351, + "eval_gen_len": 80.6202, + "eval_global_step": 1, + "eval_loss": 5.312171936035156, + "eval_rouge1": 12.7631, + "eval_rouge1_for_answerability_classification": 2.633, + "eval_rouge1_for_cause_effect_classification": 12.0797, + "eval_rouge1_for_coreference_resolution": 6.5772, + "eval_rouge1_for_data_to_text": 34.8366, + "eval_rouge1_for_dialogue_act_recognition": 3.8805, + "eval_rouge1_for_grammar_error_correction": 55.3758, + "eval_rouge1_for_keyword_tagging": 12.8669, + "eval_rouge1_for_overlap_extraction": 15.8111, + "eval_rouge1_for_question_rewriting": 42.3521, + "eval_rouge1_for_task020_mctaco_answerability_classification": 3.1529, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 4.264, + "eval_rouge1_for_task034_winogrande_question_rewriting": 42.9185, + "eval_rouge1_for_task035_winogrande_question_rewriting": 33.3799, + "eval_rouge1_for_task036_qasc_keyword_tagging": 32.565, + "eval_rouge1_for_task039_qasc_overlap_extraction": 1.728, + "eval_rouge1_for_task050_multirc_answerability_classification": 1.7352, + "eval_rouge1_for_task102_commongen_data_to_text": 37.0424, + "eval_rouge1_for_task1152_bard_word_analogy": 0.7359, + "eval_rouge1_for_task1153_bard_word_analogy": 0.6648, + "eval_rouge1_for_task1154_bard_word_analogy": 0.3957, + "eval_rouge1_for_task1155_bard_word_analogy": 9.4638, + "eval_rouge1_for_task1156_bard_word_analogy": 3.5375, + "eval_rouge1_for_task1157_bard_word_analogy": 2.4315, + "eval_rouge1_for_task1158_bard_word_analogy": 3.8993, + "eval_rouge1_for_task1159_bard_word_analogy": 1.6096, + "eval_rouge1_for_task1161_coda_19_title_generation": 19.3981, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 64.1494, + "eval_rouge1_for_task121_atomic_question_rewriting": 17.0482, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 2.3319, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 5.3241, + "eval_rouge1_for_task1344_rte_textual_entailment": 1.5696, + "eval_rouge1_for_task1345_qqp_question_rewriting": 27.3177, + "eval_rouge1_for_task1356_xlsum_title_generation": 7.6697, + "eval_rouge1_for_task1358_xlsum_title_generation": 18.1836, + "eval_rouge1_for_task1385_anli_textual_entailment": 6.0254, + "eval_rouge1_for_task1386_anli_textual_entailment": 2.6658, + "eval_rouge1_for_task1387_anli_textual_entailment": 3.0285, + "eval_rouge1_for_task1388_cb_textual_entailment": 5.8699, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 3.5174, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 5.3641, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 1.9204, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 3.4532, + "eval_rouge1_for_task1407_dart_data_to_text": 20.0985, + "eval_rouge1_for_task1409_dart_data_to_text": 25.865, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.1162, + "eval_rouge1_for_task1439_doqa_answerability_classification": 1.0754, + "eval_rouge1_for_task1442_doqa_answerability_classification": 0.5756, + "eval_rouge1_for_task1516_imppres_textual_entailment": 1.7469, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 2.3383, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 1.1964, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 3.8968, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 3.8884, + "eval_rouge1_for_task1540_peer_read_title_generation": 6.7916, + "eval_rouge1_for_task1554_scitail_textual_entailment": 2.4679, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 74.6354, + "eval_rouge1_for_task1562_zest_question_rewriting": 20.6782, + "eval_rouge1_for_task1586_scifact_title_generation": 17.7639, + "eval_rouge1_for_task1598_nyc_data_to_text": 41.4013, + "eval_rouge1_for_task1612_sick_textual_entailment": 1.6057, + "eval_rouge1_for_task1615_sick_textual_entailment": 6.1309, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 68.809, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 1.4819, + "eval_rouge1_for_task1631_open_pi_data_to_text": 59.55, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 1.2198, + "eval_rouge1_for_task1659_billsum_title_generation": 18.3742, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 8.4355, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 37.08, + "eval_rouge1_for_task190_snli_textual_entailment": 0.748, + "eval_rouge1_for_task199_multinli_textual_entailment": 3.4259, + "eval_rouge1_for_task200_multinli_textual_entailment": 2.3605, + "eval_rouge1_for_task201_multinli_textual_entailment": 1.9549, + "eval_rouge1_for_task202_multinli_textual_entailment": 4.7198, + "eval_rouge1_for_task219_rocstories_title_generation": 7.4906, + "eval_rouge1_for_task220_rocstories_title_generation": 4.8638, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 2.0274, + "eval_rouge1_for_task232_iirc_answerability_classification": 2.305, + "eval_rouge1_for_task233_iirc_answerability_classification": 2.0195, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 4.1583, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 14.1996, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 29.8943, + "eval_rouge1_for_task288_gigaword_title_generation": 13.761, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 11.7568, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 9.6973, + "eval_rouge1_for_task329_gap_coreference_resolution": 3.8812, + "eval_rouge1_for_task330_gap_coreference_resolution": 11.9147, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 1.2509, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 6.643, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 8.8093, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 7.8504, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 22.6475, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 6.0821, + "eval_rouge1_for_task402_grailqa_question_rewriting": 26.1572, + "eval_rouge1_for_task418_persent_title_generation": 16.7117, + "eval_rouge1_for_task442_com_qa_question_rewriting": 63.6953, + "eval_rouge1_for_task500_scruples_title_generation": 12.4388, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 16.4876, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 1.4695, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 7.4075, + "eval_rouge1_for_task602_wikitext_title_generation": 5.4534, + "eval_rouge1_for_task613_liar_keyword_tagging": 3.6296, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 35.0651, + "eval_rouge1_for_task619_ohsumed_title_generation": 18.8141, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 3.2937, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 1.5879, + "eval_rouge1_for_task640_e_snli_textual_entailment": 4.275, + "eval_rouge1_for_task641_e_snli_textual_entailment": 0.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 3.5052, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 23.2583, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 14.2114, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 75.549, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 26.1712, + "eval_rouge1_for_task677_ollie_data_to_text": 28.5201, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 9.011, + "eval_rouge1_for_task743_eurlex_title_generation": 19.46, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.9751, + "eval_rouge1_for_task769_qed_title_generation": 8.3295, + "eval_rouge1_for_task827_copa_cause_effect_classification": 3.3065, + "eval_rouge1_for_task828_copa_cause_effect_classification": 4.9583, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 4.7563, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 3.3298, + "eval_rouge1_for_task890_gwsd_textual_entailment": 1.5083, + "eval_rouge1_for_task891_gap_coreference_resolution": 4.4022, + "eval_rouge1_for_task892_gap_coreference_resolution": 1.5658, + "eval_rouge1_for_task893_gap_coreference_resolution": 2.2138, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 3.261, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 1.8103, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 3.368, + "eval_rouge1_for_task957_e2e_data_to_text": 36.1193, + "eval_rouge1_for_task970_sherliic_textual_entailment": 1.7403, + "eval_rouge1_for_textual_entailment": 3.1307, + "eval_rouge1_for_title_generation": 12.5477, + "eval_rouge1_for_word_analogy": 2.8423, + "eval_rougeL": 11.6784, + "eval_rougeL_for_answerability_classification": 2.633, + "eval_rougeL_for_cause_effect_classification": 10.2607, + "eval_rougeL_for_coreference_resolution": 6.5653, + "eval_rougeL_for_data_to_text": 28.7281, + "eval_rougeL_for_dialogue_act_recognition": 3.842, + "eval_rougeL_for_grammar_error_correction": 54.7346, + "eval_rougeL_for_keyword_tagging": 12.422, + "eval_rougeL_for_overlap_extraction": 14.8038, + "eval_rougeL_for_question_rewriting": 39.5446, + "eval_rougeL_for_task020_mctaco_answerability_classification": 3.1529, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 4.264, + "eval_rougeL_for_task034_winogrande_question_rewriting": 37.987, + "eval_rougeL_for_task035_winogrande_question_rewriting": 33.065, + "eval_rougeL_for_task036_qasc_keyword_tagging": 30.6445, + "eval_rougeL_for_task039_qasc_overlap_extraction": 1.728, + "eval_rougeL_for_task050_multirc_answerability_classification": 1.7352, + "eval_rougeL_for_task102_commongen_data_to_text": 31.9873, + "eval_rougeL_for_task1152_bard_word_analogy": 0.7359, + "eval_rougeL_for_task1153_bard_word_analogy": 0.6648, + "eval_rougeL_for_task1154_bard_word_analogy": 0.3957, + "eval_rougeL_for_task1155_bard_word_analogy": 9.4638, + "eval_rougeL_for_task1156_bard_word_analogy": 3.5375, + "eval_rougeL_for_task1157_bard_word_analogy": 2.4315, + "eval_rougeL_for_task1158_bard_word_analogy": 3.8993, + "eval_rougeL_for_task1159_bard_word_analogy": 1.6096, + "eval_rougeL_for_task1161_coda_19_title_generation": 16.4735, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 61.5996, + "eval_rougeL_for_task121_atomic_question_rewriting": 16.1698, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 2.3319, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 4.5881, + "eval_rougeL_for_task1344_rte_textual_entailment": 1.5696, + "eval_rougeL_for_task1345_qqp_question_rewriting": 23.8248, + "eval_rougeL_for_task1356_xlsum_title_generation": 6.0439, + "eval_rougeL_for_task1358_xlsum_title_generation": 14.9473, + "eval_rougeL_for_task1385_anli_textual_entailment": 6.0254, + "eval_rougeL_for_task1386_anli_textual_entailment": 2.6658, + "eval_rougeL_for_task1387_anli_textual_entailment": 3.0285, + "eval_rougeL_for_task1388_cb_textual_entailment": 5.8699, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 3.5174, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 5.3641, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 1.9204, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 3.4085, + "eval_rougeL_for_task1407_dart_data_to_text": 16.4244, + "eval_rougeL_for_task1409_dart_data_to_text": 20.0466, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.7692, + "eval_rougeL_for_task1439_doqa_answerability_classification": 1.0754, + "eval_rougeL_for_task1442_doqa_answerability_classification": 0.5756, + "eval_rougeL_for_task1516_imppres_textual_entailment": 1.7469, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 2.3383, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 1.1964, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 3.8968, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 3.8884, + "eval_rougeL_for_task1540_peer_read_title_generation": 5.6013, + "eval_rougeL_for_task1554_scitail_textual_entailment": 2.4679, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 73.7, + "eval_rougeL_for_task1562_zest_question_rewriting": 18.4435, + "eval_rougeL_for_task1586_scifact_title_generation": 14.161, + "eval_rougeL_for_task1598_nyc_data_to_text": 29.7319, + "eval_rougeL_for_task1612_sick_textual_entailment": 1.6057, + "eval_rougeL_for_task1615_sick_textual_entailment": 6.0815, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 67.7321, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 1.4819, + "eval_rougeL_for_task1631_open_pi_data_to_text": 58.4478, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 1.2198, + "eval_rougeL_for_task1659_billsum_title_generation": 14.269, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 8.4056, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 29.5958, + "eval_rougeL_for_task190_snli_textual_entailment": 0.748, + "eval_rougeL_for_task199_multinli_textual_entailment": 3.4259, + "eval_rougeL_for_task200_multinli_textual_entailment": 2.3605, + "eval_rougeL_for_task201_multinli_textual_entailment": 1.9549, + "eval_rougeL_for_task202_multinli_textual_entailment": 4.7198, + "eval_rougeL_for_task219_rocstories_title_generation": 7.1381, + "eval_rougeL_for_task220_rocstories_title_generation": 4.8638, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 2.0274, + "eval_rougeL_for_task232_iirc_answerability_classification": 2.305, + "eval_rougeL_for_task233_iirc_answerability_classification": 2.0195, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 4.1583, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 14.1996, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 27.8796, + "eval_rougeL_for_task288_gigaword_title_generation": 12.3266, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 11.7568, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 9.6973, + "eval_rougeL_for_task329_gap_coreference_resolution": 3.8812, + "eval_rougeL_for_task330_gap_coreference_resolution": 11.9147, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 1.2509, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 6.643, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 8.7223, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 7.8504, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 18.1363, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 6.0821, + "eval_rougeL_for_task402_grailqa_question_rewriting": 22.3264, + "eval_rougeL_for_task418_persent_title_generation": 14.678, + "eval_rougeL_for_task442_com_qa_question_rewriting": 55.3419, + "eval_rougeL_for_task500_scruples_title_generation": 10.8021, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 16.2665, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 1.4695, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 6.6583, + "eval_rougeL_for_task602_wikitext_title_generation": 5.3687, + "eval_rougeL_for_task613_liar_keyword_tagging": 3.6296, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 26.9302, + "eval_rougeL_for_task619_ohsumed_title_generation": 17.0732, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 2.9895, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 1.5879, + "eval_rougeL_for_task640_e_snli_textual_entailment": 4.275, + "eval_rougeL_for_task641_e_snli_textual_entailment": 0.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 3.5052, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 23.2583, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 14.0745, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 73.6789, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 24.8219, + "eval_rougeL_for_task677_ollie_data_to_text": 23.7767, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 9.011, + "eval_rougeL_for_task743_eurlex_title_generation": 16.0092, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 7.0084, + "eval_rougeL_for_task769_qed_title_generation": 8.3295, + "eval_rougeL_for_task827_copa_cause_effect_classification": 3.3065, + "eval_rougeL_for_task828_copa_cause_effect_classification": 4.9583, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 4.7563, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 3.1048, + "eval_rougeL_for_task890_gwsd_textual_entailment": 1.5083, + "eval_rougeL_for_task891_gap_coreference_resolution": 4.4022, + "eval_rougeL_for_task892_gap_coreference_resolution": 1.5658, + "eval_rougeL_for_task893_gap_coreference_resolution": 2.2138, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 3.261, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 1.8103, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 3.368, + "eval_rougeL_for_task957_e2e_data_to_text": 25.4617, + "eval_rougeL_for_task970_sherliic_textual_entailment": 1.7403, + "eval_rougeL_for_textual_entailment": 3.1287, + "eval_rougeL_for_title_generation": 10.9159, + "eval_rougeL_for_word_analogy": 2.8423, + "eval_runtime": 4553.5655, + "eval_samples_per_second": 2.616, + "eval_steps_per_second": 0.164, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 1.8445, + "step": 50 + }, + { + "epoch": 0.01, + "eval_exact_match": 29.3871, + "eval_exact_match_for_answerability_classification": 49.7692, + "eval_exact_match_for_cause_effect_classification": 35.7143, + "eval_exact_match_for_coreference_resolution": 37.7143, + "eval_exact_match_for_data_to_text": 7.385, + "eval_exact_match_for_dialogue_act_recognition": 50.1429, + "eval_exact_match_for_grammar_error_correction": 8.5, + "eval_exact_match_for_keyword_tagging": 32.2, + "eval_exact_match_for_overlap_extraction": 14.0, + "eval_exact_match_for_question_rewriting": 1.5455, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 45.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 15.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 28.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 23.0, + "eval_exact_match_for_task1153_bard_word_analogy": 25.0, + "eval_exact_match_for_task1154_bard_word_analogy": 37.0, + "eval_exact_match_for_task1155_bard_word_analogy": 52.0, + "eval_exact_match_for_task1156_bard_word_analogy": 47.0, + "eval_exact_match_for_task1157_bard_word_analogy": 48.0, + "eval_exact_match_for_task1158_bard_word_analogy": 43.0, + "eval_exact_match_for_task1159_bard_word_analogy": 21.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 2.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 38.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 38.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 73.0, + "eval_exact_match_for_task1407_dart_data_to_text": 1.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 42.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 52.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 17.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 35.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 54.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 12.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 16.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 9.0, + "eval_exact_match_for_task220_rocstories_title_generation": 57.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 52.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 42.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 20.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 56.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 25.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 5.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 5.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 10.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 3.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 22.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 57.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 32.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 29.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 48.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 14.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 59.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 58.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 39.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 30.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 40.75, + "eval_exact_match_for_title_generation": 9.3049, + "eval_exact_match_for_word_analogy": 37.0, + "eval_f1": 46.6413, + "eval_f1_for_answerability_classification": 52.3333, + "eval_f1_for_cause_effect_classification": 55.3757, + "eval_f1_for_coreference_resolution": 47.8839, + "eval_f1_for_data_to_text": 53.5873, + "eval_f1_for_dialogue_act_recognition": 53.7143, + "eval_f1_for_grammar_error_correction": 57.0476, + "eval_f1_for_keyword_tagging": 46.5815, + "eval_f1_for_overlap_extraction": 39.3104, + "eval_f1_for_question_rewriting": 65.9104, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 45.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 48.392, + "eval_f1_for_task035_winogrande_question_rewriting": 84.0313, + "eval_f1_for_task036_qasc_keyword_tagging": 57.6789, + "eval_f1_for_task039_qasc_overlap_extraction": 30.6667, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 55.076, + "eval_f1_for_task1152_bard_word_analogy": 23.0, + "eval_f1_for_task1153_bard_word_analogy": 25.0, + "eval_f1_for_task1154_bard_word_analogy": 37.0, + "eval_f1_for_task1155_bard_word_analogy": 52.0, + "eval_f1_for_task1156_bard_word_analogy": 47.0, + "eval_f1_for_task1157_bard_word_analogy": 48.0, + "eval_f1_for_task1158_bard_word_analogy": 43.0, + "eval_f1_for_task1159_bard_word_analogy": 21.0, + "eval_f1_for_task1161_coda_19_title_generation": 27.4645, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.0723, + "eval_f1_for_task121_atomic_question_rewriting": 50.6601, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 9.7115, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.6424, + "eval_f1_for_task1356_xlsum_title_generation": 13.0259, + "eval_f1_for_task1358_xlsum_title_generation": 32.632, + "eval_f1_for_task1385_anli_textual_entailment": 32.0, + "eval_f1_for_task1386_anli_textual_entailment": 32.0, + "eval_f1_for_task1387_anli_textual_entailment": 38.0, + "eval_f1_for_task1388_cb_textual_entailment": 38.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 73.0, + "eval_f1_for_task1407_dart_data_to_text": 40.1564, + "eval_f1_for_task1409_dart_data_to_text": 54.7347, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.8803, + "eval_f1_for_task1439_doqa_answerability_classification": 42.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 52.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 24.1621, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.215, + "eval_f1_for_task1562_zest_question_rewriting": 52.3887, + "eval_f1_for_task1586_scifact_title_generation": 29.7924, + "eval_f1_for_task1598_nyc_data_to_text": 48.958, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 35.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.6699, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.397, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 45.5407, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 67.0, + "eval_f1_for_task1728_web_nlg_data_to_text": 60.0099, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 23.7238, + "eval_f1_for_task220_rocstories_title_generation": 57.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 52.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 55.5381, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 47.9541, + "eval_f1_for_task288_gigaword_title_generation": 30.0255, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 51.6381, + "eval_f1_for_task329_gap_coreference_resolution": 35.0, + "eval_f1_for_task330_gap_coreference_resolution": 67.0905, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.1808, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 25.0, + "eval_f1_for_task402_grailqa_question_rewriting": 82.1802, + "eval_f1_for_task418_persent_title_generation": 26.085, + "eval_f1_for_task442_com_qa_question_rewriting": 69.7005, + "eval_f1_for_task500_scruples_title_generation": 15.9571, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 42.5888, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 44.4625, + "eval_f1_for_task602_wikitext_title_generation": 12.6316, + "eval_f1_for_task613_liar_keyword_tagging": 21.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 39.7823, + "eval_f1_for_task619_ohsumed_title_generation": 39.6116, + "eval_f1_for_task620_ohsumed_keyword_tagging": 39.6857, + "eval_f1_for_task623_ohsumed_keyword_tagging": 57.0, + "eval_f1_for_task640_e_snli_textual_entailment": 32.0, + "eval_f1_for_task641_e_snli_textual_entailment": 29.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 57.5429, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 39.3082, + "eval_f1_for_task670_ambigqa_question_rewriting": 71.376, + "eval_f1_for_task671_ambigqa_question_rewriting": 66.9012, + "eval_f1_for_task677_ollie_data_to_text": 35.5901, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 31.3864, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.5622, + "eval_f1_for_task769_qed_title_generation": 69.5921, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 25.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 67.1333, + "eval_f1_for_task892_gap_coreference_resolution": 39.0, + "eval_f1_for_task893_gap_coreference_resolution": 30.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task957_e2e_data_to_text": 52.0025, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 40.75, + "eval_f1_for_title_generation": 32.1397, + "eval_f1_for_word_analogy": 37.0, + "eval_gen_len": 10.0688, + "eval_global_step": 50, + "eval_loss": 1.0948777198791504, + "eval_rouge1": 48.4037, + "eval_rouge1_for_answerability_classification": 52.3333, + "eval_rouge1_for_cause_effect_classification": 56.3481, + "eval_rouge1_for_coreference_resolution": 48.6782, + "eval_rouge1_for_data_to_text": 56.1729, + "eval_rouge1_for_dialogue_act_recognition": 56.8667, + "eval_rouge1_for_grammar_error_correction": 62.244, + "eval_rouge1_for_keyword_tagging": 51.935, + "eval_rouge1_for_overlap_extraction": 44.1528, + "eval_rouge1_for_question_rewriting": 67.5493, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 47.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 48.4646, + "eval_rouge1_for_task035_winogrande_question_rewriting": 84.8356, + "eval_rouge1_for_task036_qasc_keyword_tagging": 64.699, + "eval_rouge1_for_task039_qasc_overlap_extraction": 39.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.9196, + "eval_rouge1_for_task1152_bard_word_analogy": 23.0, + "eval_rouge1_for_task1153_bard_word_analogy": 25.0, + "eval_rouge1_for_task1154_bard_word_analogy": 37.0, + "eval_rouge1_for_task1155_bard_word_analogy": 52.0, + "eval_rouge1_for_task1156_bard_word_analogy": 47.0, + "eval_rouge1_for_task1157_bard_word_analogy": 48.0, + "eval_rouge1_for_task1158_bard_word_analogy": 43.0, + "eval_rouge1_for_task1159_bard_word_analogy": 21.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 30.7989, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.6046, + "eval_rouge1_for_task121_atomic_question_rewriting": 53.0816, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.0128, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 44.3942, + "eval_rouge1_for_task1356_xlsum_title_generation": 15.9361, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.463, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 38.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 38.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 76.4, + "eval_rouge1_for_task1407_dart_data_to_text": 40.4784, + "eval_rouge1_for_task1409_dart_data_to_text": 55.6346, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.0658, + "eval_rouge1_for_task1439_doqa_answerability_classification": 42.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 52.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 26.7335, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.4222, + "eval_rouge1_for_task1562_zest_question_rewriting": 55.0558, + "eval_rouge1_for_task1586_scifact_title_generation": 33.1673, + "eval_rouge1_for_task1598_nyc_data_to_text": 49.4805, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.0096, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.462, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 46.818, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 67.0, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 61.5477, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 30.9238, + "eval_rouge1_for_task220_rocstories_title_generation": 57.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 52.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 56.3833, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 48.639, + "eval_rouge1_for_task288_gigaword_title_generation": 32.7768, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 52.4381, + "eval_rouge1_for_task329_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 67.019, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.9089, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 32.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 84.2615, + "eval_rouge1_for_task418_persent_title_generation": 29.7346, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.1971, + "eval_rouge1_for_task500_scruples_title_generation": 17.212, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.9487, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 45.6651, + "eval_rouge1_for_task602_wikitext_title_generation": 13.0697, + "eval_rouge1_for_task613_liar_keyword_tagging": 35.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 45.8608, + "eval_rouge1_for_task619_ohsumed_title_generation": 42.9355, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 45.2, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 57.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 32.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 29.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 57.4429, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 39.1877, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 72.4309, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 67.7072, + "eval_rouge1_for_task677_ollie_data_to_text": 38.0452, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 32.7874, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.1214, + "eval_rouge1_for_task769_qed_title_generation": 69.8238, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 43.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 67.1333, + "eval_rouge1_for_task892_gap_coreference_resolution": 39.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 30.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task957_e2e_data_to_text": 54.5689, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 42.5556, + "eval_rouge1_for_title_generation": 34.4011, + "eval_rouge1_for_word_analogy": 37.0, + "eval_rougeL": 47.0802, + "eval_rougeL_for_answerability_classification": 52.3333, + "eval_rougeL_for_cause_effect_classification": 55.6306, + "eval_rougeL_for_coreference_resolution": 48.6782, + "eval_rougeL_for_data_to_text": 49.22, + "eval_rougeL_for_dialogue_act_recognition": 56.8667, + "eval_rougeL_for_grammar_error_correction": 61.5836, + "eval_rougeL_for_keyword_tagging": 51.3878, + "eval_rougeL_for_overlap_extraction": 43.3294, + "eval_rougeL_for_question_rewriting": 63.7155, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 47.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 48.4646, + "eval_rougeL_for_task035_winogrande_question_rewriting": 83.7207, + "eval_rougeL_for_task036_qasc_keyword_tagging": 62.8629, + "eval_rougeL_for_task039_qasc_overlap_extraction": 39.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 60.5052, + "eval_rougeL_for_task1152_bard_word_analogy": 23.0, + "eval_rougeL_for_task1153_bard_word_analogy": 25.0, + "eval_rougeL_for_task1154_bard_word_analogy": 37.0, + "eval_rougeL_for_task1155_bard_word_analogy": 52.0, + "eval_rougeL_for_task1156_bard_word_analogy": 47.0, + "eval_rougeL_for_task1157_bard_word_analogy": 48.0, + "eval_rougeL_for_task1158_bard_word_analogy": 43.0, + "eval_rougeL_for_task1159_bard_word_analogy": 21.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 24.7989, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.1052, + "eval_rougeL_for_task121_atomic_question_rewriting": 47.137, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 9.891, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 41.4948, + "eval_rougeL_for_task1356_xlsum_title_generation": 13.2522, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.4195, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 38.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 38.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 76.4, + "eval_rougeL_for_task1407_dart_data_to_text": 36.5271, + "eval_rougeL_for_task1409_dart_data_to_text": 46.8289, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.6611, + "eval_rougeL_for_task1439_doqa_answerability_classification": 42.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 52.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 25.8444, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.506, + "eval_rougeL_for_task1562_zest_question_rewriting": 48.6503, + "eval_rougeL_for_task1586_scifact_title_generation": 27.4367, + "eval_rougeL_for_task1598_nyc_data_to_text": 40.127, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.4203, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 94.2135, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 42.695, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 67.0, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 54.3178, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 30.9238, + "eval_rougeL_for_task220_rocstories_title_generation": 57.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 52.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 56.3833, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 46.9922, + "eval_rougeL_for_task288_gigaword_title_generation": 28.331, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 52.4381, + "eval_rougeL_for_task329_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 67.019, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.0671, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 32.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 67.7813, + "eval_rougeL_for_task418_persent_title_generation": 26.7773, + "eval_rougeL_for_task442_com_qa_question_rewriting": 69.5283, + "eval_rougeL_for_task500_scruples_title_generation": 16.0097, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.0416, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 43.7102, + "eval_rougeL_for_task602_wikitext_title_generation": 13.0173, + "eval_rougeL_for_task613_liar_keyword_tagging": 35.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 41.6803, + "eval_rougeL_for_task619_ohsumed_title_generation": 36.9025, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 44.3, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 57.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 32.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 29.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 57.4429, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 39.1877, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 70.6064, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 66.9618, + "eval_rougeL_for_task677_ollie_data_to_text": 31.3001, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 28.6292, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.0938, + "eval_rougeL_for_task769_qed_title_generation": 69.8238, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 43.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 67.1333, + "eval_rougeL_for_task892_gap_coreference_resolution": 39.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 30.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.1529, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 42.5556, + "eval_rougeL_for_title_generation": 31.7501, + "eval_rougeL_for_word_analogy": 37.0, + "eval_runtime": 1096.57, + "eval_samples_per_second": 10.861, + "eval_steps_per_second": 0.679, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 1.3119, + "step": 100 + }, + { + "epoch": 0.02, + "eval_exact_match": 28.8833, + "eval_exact_match_for_answerability_classification": 50.9231, + "eval_exact_match_for_cause_effect_classification": 37.4286, + "eval_exact_match_for_coreference_resolution": 37.7857, + "eval_exact_match_for_data_to_text": 7.5061, + "eval_exact_match_for_dialogue_act_recognition": 48.5714, + "eval_exact_match_for_grammar_error_correction": 8.5, + "eval_exact_match_for_keyword_tagging": 37.0, + "eval_exact_match_for_overlap_extraction": 13.5, + "eval_exact_match_for_question_rewriting": 1.7273, + "eval_exact_match_for_task020_mctaco_answerability_classification": 54.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 52.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 19.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 27.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 53.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 20.0, + "eval_exact_match_for_task1153_bard_word_analogy": 19.0, + "eval_exact_match_for_task1154_bard_word_analogy": 21.0, + "eval_exact_match_for_task1155_bard_word_analogy": 60.0, + "eval_exact_match_for_task1156_bard_word_analogy": 47.0, + "eval_exact_match_for_task1157_bard_word_analogy": 37.0, + "eval_exact_match_for_task1158_bard_word_analogy": 41.0, + "eval_exact_match_for_task1159_bard_word_analogy": 16.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 8.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 2.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 22.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 58.0, + "eval_exact_match_for_task1407_dart_data_to_text": 1.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 17.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 42.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 57.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 4.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 24.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 11.0, + "eval_exact_match_for_task220_rocstories_title_generation": 73.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 54.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 52.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 45.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 15.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 53.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 55.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 57.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 20.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 6.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 10.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 22.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 74.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 35.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 49.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 3.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 54.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 46.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 40.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 38.0417, + "eval_exact_match_for_title_generation": 9.1368, + "eval_exact_match_for_word_analogy": 32.625, + "eval_f1": 46.0665, + "eval_f1_for_answerability_classification": 53.4872, + "eval_f1_for_cause_effect_classification": 55.7078, + "eval_f1_for_coreference_resolution": 47.467, + "eval_f1_for_data_to_text": 53.7752, + "eval_f1_for_dialogue_act_recognition": 52.3571, + "eval_f1_for_grammar_error_correction": 56.5565, + "eval_f1_for_keyword_tagging": 50.6738, + "eval_f1_for_overlap_extraction": 43.1064, + "eval_f1_for_question_rewriting": 67.0275, + "eval_f1_for_task020_mctaco_answerability_classification": 54.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 53.0, + "eval_f1_for_task034_winogrande_question_rewriting": 56.9742, + "eval_f1_for_task035_winogrande_question_rewriting": 86.3941, + "eval_f1_for_task036_qasc_keyword_tagging": 59.4357, + "eval_f1_for_task039_qasc_overlap_extraction": 35.1667, + "eval_f1_for_task050_multirc_answerability_classification": 53.0, + "eval_f1_for_task102_commongen_data_to_text": 54.2376, + "eval_f1_for_task1152_bard_word_analogy": 20.0, + "eval_f1_for_task1153_bard_word_analogy": 19.0, + "eval_f1_for_task1154_bard_word_analogy": 21.0, + "eval_f1_for_task1155_bard_word_analogy": 60.0, + "eval_f1_for_task1156_bard_word_analogy": 47.6667, + "eval_f1_for_task1157_bard_word_analogy": 37.0, + "eval_f1_for_task1158_bard_word_analogy": 41.0, + "eval_f1_for_task1159_bard_word_analogy": 16.0, + "eval_f1_for_task1161_coda_19_title_generation": 27.8796, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.3522, + "eval_f1_for_task121_atomic_question_rewriting": 51.1066, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 12.0514, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.32, + "eval_f1_for_task1356_xlsum_title_generation": 14.6223, + "eval_f1_for_task1358_xlsum_title_generation": 31.1398, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 32.0, + "eval_f1_for_task1387_anli_textual_entailment": 34.0, + "eval_f1_for_task1388_cb_textual_entailment": 22.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 58.0, + "eval_f1_for_task1407_dart_data_to_text": 38.6241, + "eval_f1_for_task1409_dart_data_to_text": 53.4475, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 28.4908, + "eval_f1_for_task1439_doqa_answerability_classification": 48.0, + "eval_f1_for_task1442_doqa_answerability_classification": 49.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 51.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 22.8016, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.6222, + "eval_f1_for_task1562_zest_question_rewriting": 48.3508, + "eval_f1_for_task1586_scifact_title_generation": 25.8597, + "eval_f1_for_task1598_nyc_data_to_text": 50.1169, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 42.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.8111, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_f1_for_task1631_open_pi_data_to_text": 93.987, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 38.4063, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 69.1905, + "eval_f1_for_task1728_web_nlg_data_to_text": 61.5381, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 24.5976, + "eval_f1_for_task220_rocstories_title_generation": 73.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 54.0, + "eval_f1_for_task233_iirc_answerability_classification": 52.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 58.2, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 51.0461, + "eval_f1_for_task288_gigaword_title_generation": 27.9, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 64.7333, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 63.5905, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 73.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 85.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.1247, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 20.0, + "eval_f1_for_task402_grailqa_question_rewriting": 81.9259, + "eval_f1_for_task418_persent_title_generation": 22.3315, + "eval_f1_for_task442_com_qa_question_rewriting": 70.8651, + "eval_f1_for_task500_scruples_title_generation": 17.1448, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 42.5352, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 43.2485, + "eval_f1_for_task602_wikitext_title_generation": 12.4742, + "eval_f1_for_task613_liar_keyword_tagging": 22.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 37.1631, + "eval_f1_for_task619_ohsumed_title_generation": 37.8432, + "eval_f1_for_task620_ohsumed_keyword_tagging": 41.9333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 74.0, + "eval_f1_for_task640_e_snli_textual_entailment": 35.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 55.6667, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 8.7374, + "eval_f1_for_task670_ambigqa_question_rewriting": 74.4051, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.7975, + "eval_f1_for_task677_ollie_data_to_text": 35.402, + "eval_f1_for_task738_perspectrum_textual_entailment": 54.0, + "eval_f1_for_task743_eurlex_title_generation": 30.6379, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.495, + "eval_f1_for_task769_qed_title_generation": 58.8667, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.0, + "eval_f1_for_task890_gwsd_textual_entailment": 40.0, + "eval_f1_for_task891_gap_coreference_resolution": 65.0857, + "eval_f1_for_task892_gap_coreference_resolution": 51.0, + "eval_f1_for_task893_gap_coreference_resolution": 28.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 55.6611, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 38.0417, + "eval_f1_for_title_generation": 31.4655, + "eval_f1_for_word_analogy": 32.7083, + "eval_gen_len": 9.9669, + "eval_global_step": 100, + "eval_loss": 1.0599020719528198, + "eval_rouge1": 47.6007, + "eval_rouge1_for_answerability_classification": 53.4872, + "eval_rouge1_for_cause_effect_classification": 56.802, + "eval_rouge1_for_coreference_resolution": 48.0727, + "eval_rouge1_for_data_to_text": 56.52, + "eval_rouge1_for_dialogue_act_recognition": 54.2619, + "eval_rouge1_for_grammar_error_correction": 61.7474, + "eval_rouge1_for_keyword_tagging": 54.6277, + "eval_rouge1_for_overlap_extraction": 46.4341, + "eval_rouge1_for_question_rewriting": 68.5543, + "eval_rouge1_for_task020_mctaco_answerability_classification": 54.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 54.5, + "eval_rouge1_for_task034_winogrande_question_rewriting": 57.0734, + "eval_rouge1_for_task035_winogrande_question_rewriting": 86.984, + "eval_rouge1_for_task036_qasc_keyword_tagging": 64.572, + "eval_rouge1_for_task039_qasc_overlap_extraction": 41.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 53.0, + "eval_rouge1_for_task102_commongen_data_to_text": 68.4905, + "eval_rouge1_for_task1152_bard_word_analogy": 20.0, + "eval_rouge1_for_task1153_bard_word_analogy": 19.0, + "eval_rouge1_for_task1154_bard_word_analogy": 21.0, + "eval_rouge1_for_task1155_bard_word_analogy": 60.0, + "eval_rouge1_for_task1156_bard_word_analogy": 47.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 37.0, + "eval_rouge1_for_task1158_bard_word_analogy": 41.0, + "eval_rouge1_for_task1159_bard_word_analogy": 16.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 31.7572, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.4934, + "eval_rouge1_for_task121_atomic_question_rewriting": 53.2236, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 12.5619, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.9052, + "eval_rouge1_for_task1356_xlsum_title_generation": 17.2446, + "eval_rouge1_for_task1358_xlsum_title_generation": 35.9937, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 22.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_rouge1_for_task1407_dart_data_to_text": 38.9436, + "eval_rouge1_for_task1409_dart_data_to_text": 54.262, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.6744, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 25.9982, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.8204, + "eval_rouge1_for_task1562_zest_question_rewriting": 51.4149, + "eval_rouge1_for_task1586_scifact_title_generation": 28.7545, + "eval_rouge1_for_task1598_nyc_data_to_text": 50.6476, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 80.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.1171, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.0672, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 39.787, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 69.1905, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.8292, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 29.6738, + "eval_rouge1_for_task220_rocstories_title_generation": 73.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 54.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 52.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 58.8667, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 51.8683, + "eval_rouge1_for_task288_gigaword_title_generation": 30.5183, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 65.1333, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 63.519, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 73.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 85.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.1832, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 26.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 84.1709, + "eval_rouge1_for_task418_persent_title_generation": 25.6392, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.1175, + "eval_rouge1_for_task500_scruples_title_generation": 18.7672, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.4739, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.2795, + "eval_rouge1_for_task602_wikitext_title_generation": 13.0036, + "eval_rouge1_for_task613_liar_keyword_tagging": 33.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 43.7641, + "eval_rouge1_for_task619_ohsumed_title_generation": 40.518, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.8333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 74.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 35.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 56.0667, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 8.7222, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 75.2227, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.3747, + "eval_rouge1_for_task677_ollie_data_to_text": 37.9253, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 54.0, + "eval_rouge1_for_task743_eurlex_title_generation": 32.0019, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.8721, + "eval_rouge1_for_task769_qed_title_generation": 58.7603, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 46.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 40.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 65.0857, + "eval_rouge1_for_task892_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 28.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 58.4227, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 39.6528, + "eval_rouge1_for_title_generation": 33.6128, + "eval_rouge1_for_word_analogy": 32.7083, + "eval_rougeL": 46.2385, + "eval_rougeL_for_answerability_classification": 53.4872, + "eval_rougeL_for_cause_effect_classification": 55.8632, + "eval_rougeL_for_coreference_resolution": 48.0727, + "eval_rougeL_for_data_to_text": 48.9183, + "eval_rougeL_for_dialogue_act_recognition": 54.2619, + "eval_rougeL_for_grammar_error_correction": 60.9748, + "eval_rougeL_for_keyword_tagging": 53.9618, + "eval_rougeL_for_overlap_extraction": 45.0312, + "eval_rougeL_for_question_rewriting": 64.8616, + "eval_rougeL_for_task020_mctaco_answerability_classification": 54.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 54.5, + "eval_rougeL_for_task034_winogrande_question_rewriting": 56.9306, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.7707, + "eval_rougeL_for_task036_qasc_keyword_tagging": 62.2424, + "eval_rougeL_for_task039_qasc_overlap_extraction": 41.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 53.0, + "eval_rougeL_for_task102_commongen_data_to_text": 58.705, + "eval_rougeL_for_task1152_bard_word_analogy": 20.0, + "eval_rougeL_for_task1153_bard_word_analogy": 19.0, + "eval_rougeL_for_task1154_bard_word_analogy": 21.0, + "eval_rougeL_for_task1155_bard_word_analogy": 60.0, + "eval_rougeL_for_task1156_bard_word_analogy": 47.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 37.0, + "eval_rougeL_for_task1158_bard_word_analogy": 41.0, + "eval_rougeL_for_task1159_bard_word_analogy": 16.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.3581, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.1027, + "eval_rougeL_for_task121_atomic_question_rewriting": 47.0488, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.0195, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 41.0816, + "eval_rougeL_for_task1356_xlsum_title_generation": 15.2343, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.6885, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 22.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_rougeL_for_task1407_dart_data_to_text": 35.426, + "eval_rougeL_for_task1409_dart_data_to_text": 45.5476, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.0317, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 24.736, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.9179, + "eval_rougeL_for_task1562_zest_question_rewriting": 44.8294, + "eval_rougeL_for_task1586_scifact_title_generation": 23.0405, + "eval_rougeL_for_task1598_nyc_data_to_text": 40.2187, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 80.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.8944, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 93.8164, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 35.7676, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 69.1905, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.6511, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 29.6738, + "eval_rougeL_for_task220_rocstories_title_generation": 73.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 54.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 52.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 58.8667, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 49.0625, + "eval_rougeL_for_task288_gigaword_title_generation": 26.6351, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 65.1333, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 63.519, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 73.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 85.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.8034, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 26.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 68.7604, + "eval_rougeL_for_task418_persent_title_generation": 22.559, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.6012, + "eval_rougeL_for_task500_scruples_title_generation": 17.6825, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.9375, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 42.5443, + "eval_rougeL_for_task602_wikitext_title_generation": 12.9513, + "eval_rougeL_for_task613_liar_keyword_tagging": 33.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 38.5724, + "eval_rougeL_for_task619_ohsumed_title_generation": 36.6562, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 43.8333, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 74.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 35.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 56.0667, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 8.7222, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 73.9369, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.5209, + "eval_rougeL_for_task677_ollie_data_to_text": 31.2755, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 54.0, + "eval_rougeL_for_task743_eurlex_title_generation": 27.9138, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.2396, + "eval_rougeL_for_task769_qed_title_generation": 58.7603, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 46.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 40.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 65.0857, + "eval_rougeL_for_task892_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 28.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 44.3221, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 39.6528, + "eval_rougeL_for_title_generation": 31.1147, + "eval_rougeL_for_word_analogy": 32.7083, + "eval_runtime": 1096.0024, + "eval_samples_per_second": 10.867, + "eval_steps_per_second": 0.68, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 1.2953, + "step": 200 + }, + { + "epoch": 0.04, + "eval_exact_match": 30.0924, + "eval_exact_match_for_answerability_classification": 50.1538, + "eval_exact_match_for_cause_effect_classification": 37.8571, + "eval_exact_match_for_coreference_resolution": 38.2857, + "eval_exact_match_for_data_to_text": 8.2324, + "eval_exact_match_for_dialogue_act_recognition": 48.5714, + "eval_exact_match_for_grammar_error_correction": 9.0, + "eval_exact_match_for_keyword_tagging": 35.4, + "eval_exact_match_for_overlap_extraction": 8.0, + "eval_exact_match_for_question_rewriting": 1.8182, + "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 46.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 23.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 16.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 53.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 23.0, + "eval_exact_match_for_task1153_bard_word_analogy": 24.0, + "eval_exact_match_for_task1154_bard_word_analogy": 23.0, + "eval_exact_match_for_task1155_bard_word_analogy": 65.0, + "eval_exact_match_for_task1156_bard_word_analogy": 61.0, + "eval_exact_match_for_task1157_bard_word_analogy": 46.0, + "eval_exact_match_for_task1158_bard_word_analogy": 57.0, + "eval_exact_match_for_task1159_bard_word_analogy": 30.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 9.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 2.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 70.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 34.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 18.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 39.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 64.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 20.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 25.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 36.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task219_rocstories_title_generation": 7.0, + "eval_exact_match_for_task220_rocstories_title_generation": 63.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 48.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 57.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 56.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 15.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 36.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 58.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 36.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 59.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 56.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 23.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 9.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 19.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 69.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 3.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 71.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 55.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 32.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 44.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 53.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 39.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 41.5833, + "eval_exact_match_for_title_generation": 9.2489, + "eval_exact_match_for_word_analogy": 41.125, + "eval_f1": 46.6422, + "eval_f1_for_answerability_classification": 52.7179, + "eval_f1_for_cause_effect_classification": 55.4162, + "eval_f1_for_coreference_resolution": 45.5569, + "eval_f1_for_data_to_text": 53.4827, + "eval_f1_for_dialogue_act_recognition": 53.1429, + "eval_f1_for_grammar_error_correction": 56.9722, + "eval_f1_for_keyword_tagging": 49.4741, + "eval_f1_for_overlap_extraction": 29.9772, + "eval_f1_for_question_rewriting": 65.4632, + "eval_f1_for_task020_mctaco_answerability_classification": 52.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 47.1667, + "eval_f1_for_task034_winogrande_question_rewriting": 30.795, + "eval_f1_for_task035_winogrande_question_rewriting": 87.8277, + "eval_f1_for_task036_qasc_keyword_tagging": 61.3197, + "eval_f1_for_task039_qasc_overlap_extraction": 27.5667, + "eval_f1_for_task050_multirc_answerability_classification": 53.0, + "eval_f1_for_task102_commongen_data_to_text": 49.4997, + "eval_f1_for_task1152_bard_word_analogy": 23.0, + "eval_f1_for_task1153_bard_word_analogy": 24.0, + "eval_f1_for_task1154_bard_word_analogy": 23.0, + "eval_f1_for_task1155_bard_word_analogy": 65.0, + "eval_f1_for_task1156_bard_word_analogy": 61.0, + "eval_f1_for_task1157_bard_word_analogy": 46.0, + "eval_f1_for_task1158_bard_word_analogy": 57.0, + "eval_f1_for_task1159_bard_word_analogy": 30.0, + "eval_f1_for_task1161_coda_19_title_generation": 28.4042, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.1629, + "eval_f1_for_task121_atomic_question_rewriting": 50.2182, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 11.8443, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.8603, + "eval_f1_for_task1356_xlsum_title_generation": 16.3676, + "eval_f1_for_task1358_xlsum_title_generation": 30.2587, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 70.0, + "eval_f1_for_task1407_dart_data_to_text": 37.0592, + "eval_f1_for_task1409_dart_data_to_text": 52.4759, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.1358, + "eval_f1_for_task1439_doqa_answerability_classification": 50.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 34.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 22.2358, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.8086, + "eval_f1_for_task1562_zest_question_rewriting": 55.6858, + "eval_f1_for_task1586_scifact_title_generation": 29.9719, + "eval_f1_for_task1598_nyc_data_to_text": 52.316, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 39.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.1534, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 95.3513, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 42.5066, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 67.4762, + "eval_f1_for_task1728_web_nlg_data_to_text": 60.3521, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 36.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 31.0, + "eval_f1_for_task219_rocstories_title_generation": 23.3334, + "eval_f1_for_task220_rocstories_title_generation": 63.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_f1_for_task232_iirc_answerability_classification": 47.0, + "eval_f1_for_task233_iirc_answerability_classification": 48.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 57.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 66.9833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.3877, + "eval_f1_for_task288_gigaword_title_generation": 27.2642, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 37.0, + "eval_f1_for_task329_gap_coreference_resolution": 36.0, + "eval_f1_for_task330_gap_coreference_resolution": 66.4943, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 68.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 86.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.0644, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 26.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 82.5713, + "eval_f1_for_task418_persent_title_generation": 21.4917, + "eval_f1_for_task442_com_qa_question_rewriting": 70.092, + "eval_f1_for_task500_scruples_title_generation": 17.1648, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 42.4944, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 42.6612, + "eval_f1_for_task602_wikitext_title_generation": 11.6792, + "eval_f1_for_task613_liar_keyword_tagging": 18.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 33.1822, + "eval_f1_for_task619_ohsumed_title_generation": 34.8821, + "eval_f1_for_task620_ohsumed_keyword_tagging": 40.0366, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 33.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 77.3476, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 3.3761, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.7952, + "eval_f1_for_task671_ambigqa_question_rewriting": 64.9338, + "eval_f1_for_task677_ollie_data_to_text": 35.7, + "eval_f1_for_task738_perspectrum_textual_entailment": 71.0, + "eval_f1_for_task743_eurlex_title_generation": 31.2492, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.074, + "eval_f1_for_task769_qed_title_generation": 66.9709, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 32.0, + "eval_f1_for_task890_gwsd_textual_entailment": 44.0, + "eval_f1_for_task891_gap_coreference_resolution": 65.6333, + "eval_f1_for_task892_gap_coreference_resolution": 39.0, + "eval_f1_for_task893_gap_coreference_resolution": 32.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_f1_for_task957_e2e_data_to_text": 57.4335, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 41.5833, + "eval_f1_for_title_generation": 31.4973, + "eval_f1_for_word_analogy": 41.125, + "eval_gen_len": 10.7614, + "eval_global_step": 200, + "eval_loss": 1.0654383897781372, + "eval_rouge1": 48.2257, + "eval_rouge1_for_answerability_classification": 52.7179, + "eval_rouge1_for_cause_effect_classification": 55.9332, + "eval_rouge1_for_coreference_resolution": 45.8978, + "eval_rouge1_for_data_to_text": 56.3959, + "eval_rouge1_for_dialogue_act_recognition": 55.2238, + "eval_rouge1_for_grammar_error_correction": 61.8562, + "eval_rouge1_for_keyword_tagging": 54.3504, + "eval_rouge1_for_overlap_extraction": 34.7968, + "eval_rouge1_for_question_rewriting": 67.039, + "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 48.0667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 30.7573, + "eval_rouge1_for_task035_winogrande_question_rewriting": 88.6491, + "eval_rouge1_for_task036_qasc_keyword_tagging": 65.9013, + "eval_rouge1_for_task039_qasc_overlap_extraction": 36.0667, + "eval_rouge1_for_task050_multirc_answerability_classification": 53.0, + "eval_rouge1_for_task102_commongen_data_to_text": 64.5465, + "eval_rouge1_for_task1152_bard_word_analogy": 23.0, + "eval_rouge1_for_task1153_bard_word_analogy": 24.0, + "eval_rouge1_for_task1154_bard_word_analogy": 23.0, + "eval_rouge1_for_task1155_bard_word_analogy": 65.0, + "eval_rouge1_for_task1156_bard_word_analogy": 61.0, + "eval_rouge1_for_task1157_bard_word_analogy": 46.0, + "eval_rouge1_for_task1158_bard_word_analogy": 57.0, + "eval_rouge1_for_task1159_bard_word_analogy": 30.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 31.9543, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.6799, + "eval_rouge1_for_task121_atomic_question_rewriting": 52.5123, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 12.2473, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.3395, + "eval_rouge1_for_task1356_xlsum_title_generation": 19.1842, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.8634, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 71.9, + "eval_rouge1_for_task1407_dart_data_to_text": 38.3146, + "eval_rouge1_for_task1409_dart_data_to_text": 53.3246, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.7764, + "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 34.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 24.8093, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.9361, + "eval_rouge1_for_task1562_zest_question_rewriting": 58.4797, + "eval_rouge1_for_task1586_scifact_title_generation": 33.204, + "eval_rouge1_for_task1598_nyc_data_to_text": 53.4926, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 79.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.4372, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 95.3204, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 44.4152, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 67.4762, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 61.6769, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 36.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task219_rocstories_title_generation": 28.6334, + "eval_rouge1_for_task220_rocstories_title_generation": 63.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 47.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 48.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 57.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 67.6024, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 33.527, + "eval_rouge1_for_task288_gigaword_title_generation": 29.668, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 37.5667, + "eval_rouge1_for_task329_gap_coreference_resolution": 36.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 66.4494, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 68.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 86.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.3466, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 29.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 84.8839, + "eval_rouge1_for_task418_persent_title_generation": 24.2101, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.6804, + "eval_rouge1_for_task500_scruples_title_generation": 19.3916, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.0025, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.1722, + "eval_rouge1_for_task602_wikitext_title_generation": 12.2663, + "eval_rouge1_for_task613_liar_keyword_tagging": 33.4, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 36.519, + "eval_rouge1_for_task619_ohsumed_title_generation": 38.1194, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.6033, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 77.8476, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 3.4889, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.4964, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 65.5134, + "eval_rouge1_for_task677_ollie_data_to_text": 38.1281, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 71.0, + "eval_rouge1_for_task743_eurlex_title_generation": 32.8142, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.3267, + "eval_rouge1_for_task769_qed_title_generation": 67.6999, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 44.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 65.5857, + "eval_rouge1_for_task892_gap_coreference_resolution": 39.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 32.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_rouge1_for_task957_e2e_data_to_text": 59.3817, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 43.2778, + "eval_rouge1_for_title_generation": 33.7272, + "eval_rouge1_for_word_analogy": 41.125, + "eval_rougeL": 46.8146, + "eval_rougeL_for_answerability_classification": 52.7179, + "eval_rougeL_for_cause_effect_classification": 55.0222, + "eval_rougeL_for_coreference_resolution": 45.8978, + "eval_rougeL_for_data_to_text": 48.705, + "eval_rougeL_for_dialogue_act_recognition": 55.2238, + "eval_rougeL_for_grammar_error_correction": 61.1561, + "eval_rougeL_for_keyword_tagging": 53.7281, + "eval_rougeL_for_overlap_extraction": 34.2594, + "eval_rougeL_for_question_rewriting": 63.2535, + "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 48.0667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 30.6883, + "eval_rougeL_for_task035_winogrande_question_rewriting": 87.5917, + "eval_rougeL_for_task036_qasc_keyword_tagging": 64.1227, + "eval_rougeL_for_task039_qasc_overlap_extraction": 36.0667, + "eval_rougeL_for_task050_multirc_answerability_classification": 53.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.54, + "eval_rougeL_for_task1152_bard_word_analogy": 23.0, + "eval_rougeL_for_task1153_bard_word_analogy": 24.0, + "eval_rougeL_for_task1154_bard_word_analogy": 23.0, + "eval_rougeL_for_task1155_bard_word_analogy": 65.0, + "eval_rougeL_for_task1156_bard_word_analogy": 61.0, + "eval_rougeL_for_task1157_bard_word_analogy": 46.0, + "eval_rougeL_for_task1158_bard_word_analogy": 57.0, + "eval_rougeL_for_task1159_bard_word_analogy": 30.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.7949, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.1161, + "eval_rougeL_for_task121_atomic_question_rewriting": 48.6727, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 11.5403, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.3162, + "eval_rougeL_for_task1356_xlsum_title_generation": 16.269, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.1819, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 71.9, + "eval_rougeL_for_task1407_dart_data_to_text": 34.2281, + "eval_rougeL_for_task1409_dart_data_to_text": 44.7108, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.2787, + "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 34.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 23.1518, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.0336, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.8398, + "eval_rougeL_for_task1586_scifact_title_generation": 26.8818, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.3461, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 79.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.1404, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 94.9427, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 40.5204, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 67.4762, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.9761, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 36.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task219_rocstories_title_generation": 28.6334, + "eval_rougeL_for_task220_rocstories_title_generation": 63.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 45.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 47.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 48.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 57.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 67.6024, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.452, + "eval_rougeL_for_task288_gigaword_title_generation": 25.4623, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 37.5667, + "eval_rougeL_for_task329_gap_coreference_resolution": 36.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 66.4494, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 68.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 86.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.8302, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 29.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 67.3758, + "eval_rougeL_for_task418_persent_title_generation": 20.6048, + "eval_rougeL_for_task442_com_qa_question_rewriting": 69.6098, + "eval_rougeL_for_task500_scruples_title_generation": 18.0983, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.6264, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 42.8666, + "eval_rougeL_for_task602_wikitext_title_generation": 12.1127, + "eval_rougeL_for_task613_liar_keyword_tagging": 33.4, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 31.6588, + "eval_rougeL_for_task619_ohsumed_title_generation": 31.4672, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 43.27, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 77.8476, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 3.4889, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.5507, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 63.8873, + "eval_rougeL_for_task677_ollie_data_to_text": 31.7539, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 71.0, + "eval_rougeL_for_task743_eurlex_title_generation": 27.7863, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.34, + "eval_rougeL_for_task769_qed_title_generation": 67.6999, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 44.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 65.5857, + "eval_rougeL_for_task892_gap_coreference_resolution": 39.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 32.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_rougeL_for_task957_e2e_data_to_text": 46.4168, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 43.2778, + "eval_rougeL_for_title_generation": 30.8722, + "eval_rougeL_for_word_analogy": 41.125, + "eval_runtime": 1190.8118, + "eval_samples_per_second": 10.002, + "eval_steps_per_second": 0.626, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 1.2208, + "step": 500 + }, + { + "epoch": 0.11, + "eval_exact_match": 31.5617, + "eval_exact_match_for_answerability_classification": 51.2308, + "eval_exact_match_for_cause_effect_classification": 38.8571, + "eval_exact_match_for_coreference_resolution": 37.8571, + "eval_exact_match_for_data_to_text": 8.2324, + "eval_exact_match_for_dialogue_act_recognition": 48.4286, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 43.6, + "eval_exact_match_for_overlap_extraction": 9.0, + "eval_exact_match_for_question_rewriting": 2.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 54.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 47.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 17.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 18.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 62.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 24.0, + "eval_exact_match_for_task1153_bard_word_analogy": 30.0, + "eval_exact_match_for_task1154_bard_word_analogy": 27.0, + "eval_exact_match_for_task1155_bard_word_analogy": 59.0, + "eval_exact_match_for_task1156_bard_word_analogy": 61.0, + "eval_exact_match_for_task1157_bard_word_analogy": 61.0, + "eval_exact_match_for_task1158_bard_word_analogy": 58.0, + "eval_exact_match_for_task1159_bard_word_analogy": 33.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 12.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 51.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 25.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 28.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 37.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 42.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 44.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 58.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 31.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 58.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 64.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 62.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 39.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 63.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1659_billsum_title_generation": 16.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 28.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, + "eval_exact_match_for_task190_snli_textual_entailment": 49.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 49.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task219_rocstories_title_generation": 9.0, + "eval_exact_match_for_task220_rocstories_title_generation": 75.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 54.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 45.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 56.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 54.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 46.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 9.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 39.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 54.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 64.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 28.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 1.0, + "eval_exact_match_for_task418_persent_title_generation": 2.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 4.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 17.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 37.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 78.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 15.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 67.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 69.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 55.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 47.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 41.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 70.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 57.0, + "eval_exact_match_for_textual_entailment": 44.2083, + "eval_exact_match_for_title_generation": 10.8744, + "eval_exact_match_for_word_analogy": 44.125, + "eval_f1": 49.5643, + "eval_f1_for_answerability_classification": 54.0, + "eval_f1_for_cause_effect_classification": 57.2188, + "eval_f1_for_coreference_resolution": 46.3112, + "eval_f1_for_data_to_text": 55.1684, + "eval_f1_for_dialogue_act_recognition": 52.7857, + "eval_f1_for_grammar_error_correction": 56.8491, + "eval_f1_for_keyword_tagging": 58.2216, + "eval_f1_for_overlap_extraction": 37.2151, + "eval_f1_for_question_rewriting": 71.1558, + "eval_f1_for_task020_mctaco_answerability_classification": 54.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task034_winogrande_question_rewriting": 89.1695, + "eval_f1_for_task035_winogrande_question_rewriting": 88.0497, + "eval_f1_for_task036_qasc_keyword_tagging": 59.0651, + "eval_f1_for_task039_qasc_overlap_extraction": 29.8333, + "eval_f1_for_task050_multirc_answerability_classification": 62.0, + "eval_f1_for_task102_commongen_data_to_text": 53.6051, + "eval_f1_for_task1152_bard_word_analogy": 24.0, + "eval_f1_for_task1153_bard_word_analogy": 30.0, + "eval_f1_for_task1154_bard_word_analogy": 27.0, + "eval_f1_for_task1155_bard_word_analogy": 59.0, + "eval_f1_for_task1156_bard_word_analogy": 67.0, + "eval_f1_for_task1157_bard_word_analogy": 61.0, + "eval_f1_for_task1158_bard_word_analogy": 58.0, + "eval_f1_for_task1159_bard_word_analogy": 35.0, + "eval_f1_for_task1161_coda_19_title_generation": 33.5824, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.9764, + "eval_f1_for_task121_atomic_question_rewriting": 49.7208, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.2668, + "eval_f1_for_task1344_rte_textual_entailment": 51.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.0526, + "eval_f1_for_task1356_xlsum_title_generation": 19.3455, + "eval_f1_for_task1358_xlsum_title_generation": 33.0963, + "eval_f1_for_task1385_anli_textual_entailment": 25.0, + "eval_f1_for_task1386_anli_textual_entailment": 28.0, + "eval_f1_for_task1387_anli_textual_entailment": 37.0, + "eval_f1_for_task1388_cb_textual_entailment": 42.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 44.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 58.0, + "eval_f1_for_task1407_dart_data_to_text": 40.8751, + "eval_f1_for_task1409_dart_data_to_text": 54.9887, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 28.8261, + "eval_f1_for_task1439_doqa_answerability_classification": 52.0, + "eval_f1_for_task1442_doqa_answerability_classification": 48.0, + "eval_f1_for_task1516_imppres_textual_entailment": 31.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 58.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 64.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 33.4346, + "eval_f1_for_task1554_scitail_textual_entailment": 62.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.8721, + "eval_f1_for_task1562_zest_question_rewriting": 51.1931, + "eval_f1_for_task1586_scifact_title_generation": 35.7348, + "eval_f1_for_task1598_nyc_data_to_text": 53.5063, + "eval_f1_for_task1612_sick_textual_entailment": 39.0, + "eval_f1_for_task1615_sick_textual_entailment": 34.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.23, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_f1_for_task1631_open_pi_data_to_text": 96.2512, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_f1_for_task1659_billsum_title_generation": 45.3937, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 72.9595, + "eval_f1_for_task1728_web_nlg_data_to_text": 63.1966, + "eval_f1_for_task190_snli_textual_entailment": 49.0, + "eval_f1_for_task199_multinli_textual_entailment": 49.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 32.0, + "eval_f1_for_task219_rocstories_title_generation": 28.5619, + "eval_f1_for_task220_rocstories_title_generation": 75.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_f1_for_task232_iirc_answerability_classification": 54.0, + "eval_f1_for_task233_iirc_answerability_classification": 45.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 56.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 66.65, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 44.5969, + "eval_f1_for_task288_gigaword_title_generation": 28.9373, + "eval_f1_for_task290_tellmewhy_answerability_classification": 82.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 23.5667, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 58.3048, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 69.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 84.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 88.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 30.5949, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 32.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 82.1798, + "eval_f1_for_task418_persent_title_generation": 27.332, + "eval_f1_for_task442_com_qa_question_rewriting": 72.2754, + "eval_f1_for_task500_scruples_title_generation": 19.4556, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.5471, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 42.1159, + "eval_f1_for_task602_wikitext_title_generation": 12.2795, + "eval_f1_for_task613_liar_keyword_tagging": 23.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 43.2701, + "eval_f1_for_task619_ohsumed_title_generation": 40.5302, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.1524, + "eval_f1_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_f1_for_task640_e_snli_textual_entailment": 30.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 37.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 85.5571, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 31.1762, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.6419, + "eval_f1_for_task671_ambigqa_question_rewriting": 69.225, + "eval_f1_for_task677_ollie_data_to_text": 35.6365, + "eval_f1_for_task738_perspectrum_textual_entailment": 67.0, + "eval_f1_for_task743_eurlex_title_generation": 34.186, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.2304, + "eval_f1_for_task769_qed_title_generation": 78.242, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 55.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.0, + "eval_f1_for_task890_gwsd_textual_entailment": 47.0, + "eval_f1_for_task891_gap_coreference_resolution": 60.5333, + "eval_f1_for_task892_gap_coreference_resolution": 41.0, + "eval_f1_for_task893_gap_coreference_resolution": 35.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 70.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_f1_for_task957_e2e_data_to_text": 56.0115, + "eval_f1_for_task970_sherliic_textual_entailment": 57.0, + "eval_f1_for_textual_entailment": 44.2083, + "eval_f1_for_title_generation": 36.0469, + "eval_f1_for_word_analogy": 45.125, + "eval_gen_len": 9.104, + "eval_global_step": 500, + "eval_loss": 1.0391590595245361, + "eval_rouge1": 51.2662, + "eval_rouge1_for_answerability_classification": 54.0, + "eval_rouge1_for_cause_effect_classification": 58.0562, + "eval_rouge1_for_coreference_resolution": 46.8929, + "eval_rouge1_for_data_to_text": 58.2292, + "eval_rouge1_for_dialogue_act_recognition": 54.5361, + "eval_rouge1_for_grammar_error_correction": 61.9337, + "eval_rouge1_for_keyword_tagging": 63.5635, + "eval_rouge1_for_overlap_extraction": 41.8025, + "eval_rouge1_for_question_rewriting": 72.7657, + "eval_rouge1_for_task020_mctaco_answerability_classification": 54.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 50.9, + "eval_rouge1_for_task034_winogrande_question_rewriting": 89.2561, + "eval_rouge1_for_task035_winogrande_question_rewriting": 88.8504, + "eval_rouge1_for_task036_qasc_keyword_tagging": 66.3222, + "eval_rouge1_for_task039_qasc_overlap_extraction": 37.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 62.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.6029, + "eval_rouge1_for_task1152_bard_word_analogy": 24.0, + "eval_rouge1_for_task1153_bard_word_analogy": 30.0, + "eval_rouge1_for_task1154_bard_word_analogy": 27.0, + "eval_rouge1_for_task1155_bard_word_analogy": 59.0, + "eval_rouge1_for_task1156_bard_word_analogy": 67.0, + "eval_rouge1_for_task1157_bard_word_analogy": 61.0, + "eval_rouge1_for_task1158_bard_word_analogy": 58.0, + "eval_rouge1_for_task1159_bard_word_analogy": 35.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 38.1717, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 82.2282, + "eval_rouge1_for_task121_atomic_question_rewriting": 52.4082, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.8237, + "eval_rouge1_for_task1344_rte_textual_entailment": 51.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.3737, + "eval_rouge1_for_task1356_xlsum_title_generation": 23.5496, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.8123, + "eval_rouge1_for_task1385_anli_textual_entailment": 25.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 28.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 42.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 44.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 60.5857, + "eval_rouge1_for_task1407_dart_data_to_text": 42.2077, + "eval_rouge1_for_task1409_dart_data_to_text": 56.347, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.8693, + "eval_rouge1_for_task1439_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 31.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 58.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 64.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 36.3783, + "eval_rouge1_for_task1554_scitail_textual_entailment": 62.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.9982, + "eval_rouge1_for_task1562_zest_question_rewriting": 54.4438, + "eval_rouge1_for_task1586_scifact_title_generation": 39.0976, + "eval_rouge1_for_task1598_nyc_data_to_text": 55.6475, + "eval_rouge1_for_task1612_sick_textual_entailment": 39.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.5278, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 96.2919, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1659_billsum_title_generation": 47.5256, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 73.131, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 64.4752, + "eval_rouge1_for_task190_snli_textual_entailment": 49.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 49.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task219_rocstories_title_generation": 33.5952, + "eval_rouge1_for_task220_rocstories_title_generation": 75.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 54.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 45.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 56.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 67.0071, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 45.9384, + "eval_rouge1_for_task288_gigaword_title_generation": 32.532, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 82.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 23.5667, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 58.2333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 69.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 84.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 88.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.2332, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 38.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 84.5229, + "eval_rouge1_for_task418_persent_title_generation": 30.2033, + "eval_rouge1_for_task442_com_qa_question_rewriting": 75.4918, + "eval_rouge1_for_task500_scruples_title_generation": 21.7551, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.7295, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 43.1939, + "eval_rouge1_for_task602_wikitext_title_generation": 13.3023, + "eval_rouge1_for_task613_liar_keyword_tagging": 37.5, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 48.4938, + "eval_rouge1_for_task619_ohsumed_title_generation": 43.9015, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 41.9381, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 37.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 86.0571, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 32.0095, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.3238, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.9959, + "eval_rouge1_for_task677_ollie_data_to_text": 38.2842, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 67.0, + "eval_rouge1_for_task743_eurlex_title_generation": 35.8112, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.3757, + "eval_rouge1_for_task769_qed_title_generation": 78.221, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 55.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 47.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 60.319, + "eval_rouge1_for_task892_gap_coreference_resolution": 41.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 70.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rouge1_for_task957_e2e_data_to_text": 58.4591, + "eval_rouge1_for_task970_sherliic_textual_entailment": 57.0, + "eval_rouge1_for_textual_entailment": 46.0417, + "eval_rouge1_for_title_generation": 38.4796, + "eval_rouge1_for_word_analogy": 45.125, + "eval_rougeL": 49.7547, + "eval_rougeL_for_answerability_classification": 54.0, + "eval_rougeL_for_cause_effect_classification": 57.2132, + "eval_rougeL_for_coreference_resolution": 46.8929, + "eval_rougeL_for_data_to_text": 49.5731, + "eval_rougeL_for_dialogue_act_recognition": 54.5361, + "eval_rougeL_for_grammar_error_correction": 61.0723, + "eval_rougeL_for_keyword_tagging": 63.1984, + "eval_rougeL_for_overlap_extraction": 40.848, + "eval_rougeL_for_question_rewriting": 68.986, + "eval_rougeL_for_task020_mctaco_answerability_classification": 54.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 50.9, + "eval_rougeL_for_task034_winogrande_question_rewriting": 89.1505, + "eval_rougeL_for_task035_winogrande_question_rewriting": 87.8511, + "eval_rougeL_for_task036_qasc_keyword_tagging": 65.7302, + "eval_rougeL_for_task039_qasc_overlap_extraction": 37.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 62.0, + "eval_rougeL_for_task102_commongen_data_to_text": 59.4202, + "eval_rougeL_for_task1152_bard_word_analogy": 24.0, + "eval_rougeL_for_task1153_bard_word_analogy": 30.0, + "eval_rougeL_for_task1154_bard_word_analogy": 27.0, + "eval_rougeL_for_task1155_bard_word_analogy": 59.0, + "eval_rougeL_for_task1156_bard_word_analogy": 67.0, + "eval_rougeL_for_task1157_bard_word_analogy": 61.0, + "eval_rougeL_for_task1158_bard_word_analogy": 58.0, + "eval_rougeL_for_task1159_bard_word_analogy": 35.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 30.2583, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 81.032, + "eval_rougeL_for_task121_atomic_question_rewriting": 47.033, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.3221, + "eval_rougeL_for_task1344_rte_textual_entailment": 51.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.214, + "eval_rougeL_for_task1356_xlsum_title_generation": 19.6906, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.2321, + "eval_rougeL_for_task1385_anli_textual_entailment": 25.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 28.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 42.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 44.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 60.5857, + "eval_rougeL_for_task1407_dart_data_to_text": 35.8534, + "eval_rougeL_for_task1409_dart_data_to_text": 45.1723, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.0113, + "eval_rougeL_for_task1439_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 31.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 58.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 64.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 34.0186, + "eval_rougeL_for_task1554_scitail_textual_entailment": 62.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 87.1333, + "eval_rougeL_for_task1562_zest_question_rewriting": 48.4605, + "eval_rougeL_for_task1586_scifact_title_generation": 32.2847, + "eval_rougeL_for_task1598_nyc_data_to_text": 42.2013, + "eval_rougeL_for_task1612_sick_textual_entailment": 39.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.231, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 95.8972, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1659_billsum_title_generation": 43.6516, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 73.131, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 54.9403, + "eval_rougeL_for_task190_snli_textual_entailment": 49.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 49.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task219_rocstories_title_generation": 33.2619, + "eval_rougeL_for_task220_rocstories_title_generation": 75.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 54.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 45.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 56.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 67.0071, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 44.0292, + "eval_rougeL_for_task288_gigaword_title_generation": 28.1243, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 82.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 23.5667, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 58.2333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 69.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 84.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 88.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.2706, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 38.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 66.9696, + "eval_rougeL_for_task418_persent_title_generation": 27.5078, + "eval_rougeL_for_task442_com_qa_question_rewriting": 71.7595, + "eval_rougeL_for_task500_scruples_title_generation": 19.7072, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 43.16, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.0217, + "eval_rougeL_for_task602_wikitext_title_generation": 13.3023, + "eval_rougeL_for_task613_liar_keyword_tagging": 37.5, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 43.5553, + "eval_rougeL_for_task619_ohsumed_title_generation": 38.5047, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 41.1048, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 37.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 85.6571, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 32.0095, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.0647, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 69.0797, + "eval_rougeL_for_task677_ollie_data_to_text": 29.8258, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 67.0, + "eval_rougeL_for_task743_eurlex_title_generation": 30.7422, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.7348, + "eval_rougeL_for_task769_qed_title_generation": 78.221, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 55.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 47.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 60.319, + "eval_rougeL_for_task892_gap_coreference_resolution": 41.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 70.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rougeL_for_task957_e2e_data_to_text": 44.6724, + "eval_rougeL_for_task970_sherliic_textual_entailment": 57.0, + "eval_rougeL_for_textual_entailment": 46.0417, + "eval_rougeL_for_title_generation": 35.3634, + "eval_rougeL_for_word_analogy": 45.125, + "eval_runtime": 903.9411, + "eval_samples_per_second": 13.176, + "eval_steps_per_second": 0.824, + "step": 500 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 1.104, + "step": 1000 + }, + { + "epoch": 0.22, + "eval_exact_match": 31.6037, + "eval_exact_match_for_answerability_classification": 52.4615, + "eval_exact_match_for_cause_effect_classification": 38.5714, + "eval_exact_match_for_coreference_resolution": 39.0, + "eval_exact_match_for_data_to_text": 6.4165, + "eval_exact_match_for_dialogue_act_recognition": 50.1429, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 50.2, + "eval_exact_match_for_overlap_extraction": 20.0, + "eval_exact_match_for_question_rewriting": 1.6364, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 40.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 40.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 69.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 28.0, + "eval_exact_match_for_task1153_bard_word_analogy": 29.0, + "eval_exact_match_for_task1154_bard_word_analogy": 45.0, + "eval_exact_match_for_task1155_bard_word_analogy": 69.0, + "eval_exact_match_for_task1156_bard_word_analogy": 55.0, + "eval_exact_match_for_task1157_bard_word_analogy": 53.0, + "eval_exact_match_for_task1158_bard_word_analogy": 53.0, + "eval_exact_match_for_task1159_bard_word_analogy": 42.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 39.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 52.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 68.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 60.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 35.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 62.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 67.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 46.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 37.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 44.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 21.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 8.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 37.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 54.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 9.0, + "eval_exact_match_for_task219_rocstories_title_generation": 5.0, + "eval_exact_match_for_task220_rocstories_title_generation": 63.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 54.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 57.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 52.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 27.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 5.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, + "eval_exact_match_for_task602_wikitext_title_generation": 7.1429, + "eval_exact_match_for_task613_liar_keyword_tagging": 24.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 21.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 79.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 35.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 49.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 87.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 15.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 65.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 68.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 66.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 40.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 54.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 39.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 41.5417, + "eval_exact_match_for_title_generation": 9.3049, + "eval_exact_match_for_word_analogy": 46.75, + "eval_f1": 49.2737, + "eval_f1_for_answerability_classification": 55.0256, + "eval_f1_for_cause_effect_classification": 58.4931, + "eval_f1_for_coreference_resolution": 45.6465, + "eval_f1_for_data_to_text": 52.4977, + "eval_f1_for_dialogue_act_recognition": 53.7143, + "eval_f1_for_grammar_error_correction": 57.4171, + "eval_f1_for_keyword_tagging": 62.2341, + "eval_f1_for_overlap_extraction": 46.1397, + "eval_f1_for_question_rewriting": 69.0613, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 52.0, + "eval_f1_for_task034_winogrande_question_rewriting": 66.6799, + "eval_f1_for_task035_winogrande_question_rewriting": 90.2331, + "eval_f1_for_task036_qasc_keyword_tagging": 72.9608, + "eval_f1_for_task039_qasc_overlap_extraction": 42.6667, + "eval_f1_for_task050_multirc_answerability_classification": 69.0, + "eval_f1_for_task102_commongen_data_to_text": 53.6975, + "eval_f1_for_task1152_bard_word_analogy": 28.0, + "eval_f1_for_task1153_bard_word_analogy": 31.0, + "eval_f1_for_task1154_bard_word_analogy": 45.0, + "eval_f1_for_task1155_bard_word_analogy": 69.0, + "eval_f1_for_task1156_bard_word_analogy": 57.6667, + "eval_f1_for_task1157_bard_word_analogy": 53.0, + "eval_f1_for_task1158_bard_word_analogy": 53.0, + "eval_f1_for_task1159_bard_word_analogy": 42.6667, + "eval_f1_for_task1161_coda_19_title_generation": 37.5148, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 75.7183, + "eval_f1_for_task121_atomic_question_rewriting": 51.0559, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.6947, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.1895, + "eval_f1_for_task1356_xlsum_title_generation": 22.4907, + "eval_f1_for_task1358_xlsum_title_generation": 35.3112, + "eval_f1_for_task1385_anli_textual_entailment": 30.0, + "eval_f1_for_task1386_anli_textual_entailment": 35.0, + "eval_f1_for_task1387_anli_textual_entailment": 35.0, + "eval_f1_for_task1388_cb_textual_entailment": 39.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 52.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 54.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 68.0, + "eval_f1_for_task1407_dart_data_to_text": 29.1107, + "eval_f1_for_task1409_dart_data_to_text": 50.4214, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.1092, + "eval_f1_for_task1439_doqa_answerability_classification": 50.0, + "eval_f1_for_task1442_doqa_answerability_classification": 60.0, + "eval_f1_for_task1516_imppres_textual_entailment": 35.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 62.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 47.0, + "eval_f1_for_task1540_peer_read_title_generation": 35.284, + "eval_f1_for_task1554_scitail_textual_entailment": 67.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.7249, + "eval_f1_for_task1562_zest_question_rewriting": 57.1801, + "eval_f1_for_task1586_scifact_title_generation": 35.2849, + "eval_f1_for_task1598_nyc_data_to_text": 56.5001, + "eval_f1_for_task1612_sick_textual_entailment": 46.0, + "eval_f1_for_task1615_sick_textual_entailment": 37.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.2826, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 89.4476, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_f1_for_task1659_billsum_title_generation": 37.1533, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 67.3873, + "eval_f1_for_task1728_web_nlg_data_to_text": 61.2795, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 37.0, + "eval_f1_for_task200_multinli_textual_entailment": 54.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 9.0, + "eval_f1_for_task219_rocstories_title_generation": 21.8773, + "eval_f1_for_task220_rocstories_title_generation": 63.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 49.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 66.9548, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 49.6127, + "eval_f1_for_task288_gigaword_title_generation": 30.8761, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 9.6333, + "eval_f1_for_task329_gap_coreference_resolution": 44.0, + "eval_f1_for_task330_gap_coreference_resolution": 65.5905, + "eval_f1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.4586, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 28.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 81.5194, + "eval_f1_for_task418_persent_title_generation": 26.922, + "eval_f1_for_task442_com_qa_question_rewriting": 69.7801, + "eval_f1_for_task500_scruples_title_generation": 21.2111, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 44.6019, + "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 40.2512, + "eval_f1_for_task602_wikitext_title_generation": 15.6692, + "eval_f1_for_task613_liar_keyword_tagging": 26.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 41.3264, + "eval_f1_for_task619_ohsumed_title_generation": 42.7984, + "eval_f1_for_task620_ohsumed_keyword_tagging": 38.6333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 79.0, + "eval_f1_for_task640_e_snli_textual_entailment": 35.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 49.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.9095, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 20.6667, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.6147, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.4205, + "eval_f1_for_task677_ollie_data_to_text": 33.9027, + "eval_f1_for_task738_perspectrum_textual_entailment": 65.0, + "eval_f1_for_task743_eurlex_title_generation": 37.5056, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.5711, + "eval_f1_for_task769_qed_title_generation": 79.7968, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 66.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_f1_for_task890_gwsd_textual_entailment": 40.0, + "eval_f1_for_task891_gap_coreference_resolution": 64.9857, + "eval_f1_for_task892_gap_coreference_resolution": 39.0, + "eval_f1_for_task893_gap_coreference_resolution": 28.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 58.3426, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 41.5417, + "eval_f1_for_title_generation": 35.9157, + "eval_f1_for_word_analogy": 47.4167, + "eval_gen_len": 9.2549, + "eval_global_step": 1000, + "eval_loss": 1.0741527080535889, + "eval_rouge1": 50.8749, + "eval_rouge1_for_answerability_classification": 55.0256, + "eval_rouge1_for_cause_effect_classification": 59.2656, + "eval_rouge1_for_coreference_resolution": 46.0518, + "eval_rouge1_for_data_to_text": 55.2065, + "eval_rouge1_for_dialogue_act_recognition": 56.081, + "eval_rouge1_for_grammar_error_correction": 62.3626, + "eval_rouge1_for_keyword_tagging": 66.9349, + "eval_rouge1_for_overlap_extraction": 49.9207, + "eval_rouge1_for_question_rewriting": 70.6515, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 52.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 66.7044, + "eval_rouge1_for_task035_winogrande_question_rewriting": 90.7564, + "eval_rouge1_for_task036_qasc_keyword_tagging": 76.7315, + "eval_rouge1_for_task039_qasc_overlap_extraction": 48.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 69.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.2457, + "eval_rouge1_for_task1152_bard_word_analogy": 28.0, + "eval_rouge1_for_task1153_bard_word_analogy": 31.0, + "eval_rouge1_for_task1154_bard_word_analogy": 45.0, + "eval_rouge1_for_task1155_bard_word_analogy": 69.0, + "eval_rouge1_for_task1156_bard_word_analogy": 57.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 53.0, + "eval_rouge1_for_task1158_bard_word_analogy": 53.0, + "eval_rouge1_for_task1159_bard_word_analogy": 42.6667, + "eval_rouge1_for_task1161_coda_19_title_generation": 40.8763, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 76.2475, + "eval_rouge1_for_task121_atomic_question_rewriting": 53.2896, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 16.0028, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.5148, + "eval_rouge1_for_task1356_xlsum_title_generation": 25.6155, + "eval_rouge1_for_task1358_xlsum_title_generation": 39.2754, + "eval_rouge1_for_task1385_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 39.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 52.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 70.9, + "eval_rouge1_for_task1407_dart_data_to_text": 29.045, + "eval_rouge1_for_task1409_dart_data_to_text": 50.7875, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.0008, + "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 60.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 35.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 62.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 37.208, + "eval_rouge1_for_task1554_scitail_textual_entailment": 67.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.7243, + "eval_rouge1_for_task1562_zest_question_rewriting": 60.113, + "eval_rouge1_for_task1586_scifact_title_generation": 38.9228, + "eval_rouge1_for_task1598_nyc_data_to_text": 58.0294, + "eval_rouge1_for_task1612_sick_textual_entailment": 46.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 79.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.5606, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 89.5528, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_rouge1_for_task1659_billsum_title_generation": 39.1887, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 67.3873, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.8279, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 37.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 54.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 9.0, + "eval_rouge1_for_task219_rocstories_title_generation": 27.5654, + "eval_rouge1_for_task220_rocstories_title_generation": 63.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 67.8, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 51.1748, + "eval_rouge1_for_task288_gigaword_title_generation": 34.0743, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 9.6333, + "eval_rouge1_for_task329_gap_coreference_resolution": 44.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 65.519, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.4473, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 33.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 83.7385, + "eval_rouge1_for_task418_persent_title_generation": 30.4836, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.4797, + "eval_rouge1_for_task500_scruples_title_generation": 23.7922, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 45.126, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 41.4511, + "eval_rouge1_for_task602_wikitext_title_generation": 15.9134, + "eval_rouge1_for_task613_liar_keyword_tagging": 39.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 46.745, + "eval_rouge1_for_task619_ohsumed_title_generation": 45.9409, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 45.2, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 79.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 35.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 49.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.4095, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 20.5667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.4084, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.354, + "eval_rouge1_for_task677_ollie_data_to_text": 36.8498, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 65.0, + "eval_rouge1_for_task743_eurlex_title_generation": 39.3995, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.7245, + "eval_rouge1_for_task769_qed_title_generation": 79.6837, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 66.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 40.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 64.9857, + "eval_rouge1_for_task892_gap_coreference_resolution": 39.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 28.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 60.6994, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 43.2917, + "eval_rouge1_for_title_generation": 38.1712, + "eval_rouge1_for_word_analogy": 47.4167, + "eval_rougeL": 49.4014, + "eval_rougeL_for_answerability_classification": 55.0256, + "eval_rougeL_for_cause_effect_classification": 58.4181, + "eval_rougeL_for_coreference_resolution": 46.0518, + "eval_rougeL_for_data_to_text": 47.3976, + "eval_rougeL_for_dialogue_act_recognition": 56.081, + "eval_rougeL_for_grammar_error_correction": 61.691, + "eval_rougeL_for_keyword_tagging": 66.4663, + "eval_rougeL_for_overlap_extraction": 49.1641, + "eval_rougeL_for_question_rewriting": 67.0051, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 52.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 66.7044, + "eval_rougeL_for_task035_winogrande_question_rewriting": 90.6939, + "eval_rougeL_for_task036_qasc_keyword_tagging": 76.0221, + "eval_rougeL_for_task039_qasc_overlap_extraction": 48.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 69.0, + "eval_rougeL_for_task102_commongen_data_to_text": 58.7303, + "eval_rougeL_for_task1152_bard_word_analogy": 28.0, + "eval_rougeL_for_task1153_bard_word_analogy": 31.0, + "eval_rougeL_for_task1154_bard_word_analogy": 45.0, + "eval_rougeL_for_task1155_bard_word_analogy": 69.0, + "eval_rougeL_for_task1156_bard_word_analogy": 57.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 53.0, + "eval_rougeL_for_task1158_bard_word_analogy": 53.0, + "eval_rougeL_for_task1159_bard_word_analogy": 42.6667, + "eval_rougeL_for_task1161_coda_19_title_generation": 33.2055, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 74.8159, + "eval_rougeL_for_task121_atomic_question_rewriting": 49.3245, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.6182, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.4286, + "eval_rougeL_for_task1356_xlsum_title_generation": 22.1294, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.1568, + "eval_rougeL_for_task1385_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 39.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 52.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 70.9, + "eval_rougeL_for_task1407_dart_data_to_text": 25.1538, + "eval_rougeL_for_task1409_dart_data_to_text": 43.854, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.5635, + "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 60.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 35.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 62.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 34.9619, + "eval_rougeL_for_task1554_scitail_textual_entailment": 67.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.8185, + "eval_rougeL_for_task1562_zest_question_rewriting": 52.5069, + "eval_rougeL_for_task1586_scifact_title_generation": 32.056, + "eval_rougeL_for_task1598_nyc_data_to_text": 43.3668, + "eval_rougeL_for_task1612_sick_textual_entailment": 46.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 79.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.4024, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 88.431, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_rougeL_for_task1659_billsum_title_generation": 33.88, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 67.3873, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 54.1501, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 37.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 54.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 9.0, + "eval_rougeL_for_task219_rocstories_title_generation": 26.7654, + "eval_rougeL_for_task220_rocstories_title_generation": 63.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 67.8, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 49.6616, + "eval_rougeL_for_task288_gigaword_title_generation": 29.7179, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 9.6333, + "eval_rougeL_for_task329_gap_coreference_resolution": 44.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 65.519, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.2657, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 33.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 66.9844, + "eval_rougeL_for_task418_persent_title_generation": 26.0146, + "eval_rougeL_for_task442_com_qa_question_rewriting": 69.0233, + "eval_rougeL_for_task500_scruples_title_generation": 21.9041, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 44.1602, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.4072, + "eval_rougeL_for_task602_wikitext_title_generation": 15.9134, + "eval_rougeL_for_task613_liar_keyword_tagging": 39.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 41.994, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.5188, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 43.5667, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 79.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 35.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 49.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.4095, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 20.5667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.8054, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.3669, + "eval_rougeL_for_task677_ollie_data_to_text": 30.1796, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 65.0, + "eval_rougeL_for_task743_eurlex_title_generation": 34.6714, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.386, + "eval_rougeL_for_task769_qed_title_generation": 79.6837, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 66.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 40.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 64.9857, + "eval_rougeL_for_task892_gap_coreference_resolution": 39.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 28.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 46.7584, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 43.2917, + "eval_rougeL_for_title_generation": 34.8217, + "eval_rougeL_for_word_analogy": 47.4167, + "eval_runtime": 950.1542, + "eval_samples_per_second": 12.535, + "eval_steps_per_second": 0.784, + "step": 1000 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 1.023, + "step": 1500 + }, + { + "epoch": 0.33, + "eval_exact_match": 31.5197, + "eval_exact_match_for_answerability_classification": 52.2308, + "eval_exact_match_for_cause_effect_classification": 40.0, + "eval_exact_match_for_coreference_resolution": 40.1429, + "eval_exact_match_for_data_to_text": 6.9007, + "eval_exact_match_for_dialogue_act_recognition": 53.7143, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 46.6, + "eval_exact_match_for_overlap_extraction": 14.0, + "eval_exact_match_for_question_rewriting": 2.3636, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 55.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 3.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 36.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 26.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 78.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 17.0, + "eval_exact_match_for_task1153_bard_word_analogy": 28.0, + "eval_exact_match_for_task1154_bard_word_analogy": 14.0, + "eval_exact_match_for_task1155_bard_word_analogy": 43.0, + "eval_exact_match_for_task1156_bard_word_analogy": 42.0, + "eval_exact_match_for_task1157_bard_word_analogy": 43.0, + "eval_exact_match_for_task1158_bard_word_analogy": 26.0, + "eval_exact_match_for_task1159_bard_word_analogy": 37.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 9.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 54.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 61.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 29.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 42.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 54.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 63.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 70.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 37.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 31.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 51.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 57.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 39.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 46.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 50.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 25.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 41.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 72.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 26.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 14.0, + "eval_exact_match_for_task219_rocstories_title_generation": 10.0, + "eval_exact_match_for_task220_rocstories_title_generation": 77.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 44.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 56.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 52.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 2.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 56.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 55.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 51.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 35.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 2.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 6.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 5.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 78.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 35.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 43.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 49.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 84.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 29.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 81.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 60.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 61.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 63.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 56.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 27.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 27.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 66.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 59.0, + "eval_exact_match_for_textual_entailment": 44.75, + "eval_exact_match_for_title_generation": 9.8094, + "eval_exact_match_for_word_analogy": 31.25, + "eval_f1": 49.3105, + "eval_f1_for_answerability_classification": 54.7949, + "eval_f1_for_cause_effect_classification": 59.9168, + "eval_f1_for_coreference_resolution": 47.213, + "eval_f1_for_data_to_text": 53.7952, + "eval_f1_for_dialogue_act_recognition": 57.1429, + "eval_f1_for_grammar_error_correction": 56.4271, + "eval_f1_for_keyword_tagging": 59.5339, + "eval_f1_for_overlap_extraction": 40.598, + "eval_f1_for_question_rewriting": 71.3283, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 57.5, + "eval_f1_for_task034_winogrande_question_rewriting": 92.7837, + "eval_f1_for_task035_winogrande_question_rewriting": 89.7696, + "eval_f1_for_task036_qasc_keyword_tagging": 70.4321, + "eval_f1_for_task039_qasc_overlap_extraction": 30.4, + "eval_f1_for_task050_multirc_answerability_classification": 78.0, + "eval_f1_for_task102_commongen_data_to_text": 55.2643, + "eval_f1_for_task1152_bard_word_analogy": 17.0, + "eval_f1_for_task1153_bard_word_analogy": 28.6667, + "eval_f1_for_task1154_bard_word_analogy": 14.0, + "eval_f1_for_task1155_bard_word_analogy": 43.0, + "eval_f1_for_task1156_bard_word_analogy": 43.3333, + "eval_f1_for_task1157_bard_word_analogy": 43.0, + "eval_f1_for_task1158_bard_word_analogy": 26.0, + "eval_f1_for_task1159_bard_word_analogy": 37.6667, + "eval_f1_for_task1161_coda_19_title_generation": 37.3443, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.1098, + "eval_f1_for_task121_atomic_question_rewriting": 50.1207, + "eval_f1_for_task133_winowhy_coreference_resolution": 54.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.4687, + "eval_f1_for_task1344_rte_textual_entailment": 61.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.6031, + "eval_f1_for_task1356_xlsum_title_generation": 19.837, + "eval_f1_for_task1358_xlsum_title_generation": 33.4288, + "eval_f1_for_task1385_anli_textual_entailment": 29.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 34.0, + "eval_f1_for_task1388_cb_textual_entailment": 42.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 54.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 63.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 70.0, + "eval_f1_for_task1407_dart_data_to_text": 34.3359, + "eval_f1_for_task1409_dart_data_to_text": 53.5712, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.6542, + "eval_f1_for_task1439_doqa_answerability_classification": 37.0, + "eval_f1_for_task1442_doqa_answerability_classification": 54.0, + "eval_f1_for_task1516_imppres_textual_entailment": 31.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 51.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 34.2664, + "eval_f1_for_task1554_scitail_textual_entailment": 57.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.2, + "eval_f1_for_task1562_zest_question_rewriting": 55.099, + "eval_f1_for_task1586_scifact_title_generation": 35.4522, + "eval_f1_for_task1598_nyc_data_to_text": 53.5534, + "eval_f1_for_task1612_sick_textual_entailment": 39.0, + "eval_f1_for_task1615_sick_textual_entailment": 46.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.846, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 92.1891, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_f1_for_task1659_billsum_title_generation": 38.5266, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 69.419, + "eval_f1_for_task1728_web_nlg_data_to_text": 61.0482, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 41.0, + "eval_f1_for_task200_multinli_textual_entailment": 72.0, + "eval_f1_for_task201_multinli_textual_entailment": 26.0, + "eval_f1_for_task202_multinli_textual_entailment": 14.0, + "eval_f1_for_task219_rocstories_title_generation": 22.7302, + "eval_f1_for_task220_rocstories_title_generation": 77.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 46.0, + "eval_f1_for_task233_iirc_answerability_classification": 44.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 56.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 64.2333, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 50.7961, + "eval_f1_for_task288_gigaword_title_generation": 26.7779, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 12.6, + "eval_f1_for_task329_gap_coreference_resolution": 50.0, + "eval_f1_for_task330_gap_coreference_resolution": 63.4524, + "eval_f1_for_task349_squad2.0_answerability_classification": 56.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.4018, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 37.5, + "eval_f1_for_task402_grailqa_question_rewriting": 79.7952, + "eval_f1_for_task418_persent_title_generation": 25.707, + "eval_f1_for_task442_com_qa_question_rewriting": 69.5729, + "eval_f1_for_task500_scruples_title_generation": 18.1787, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 40.6885, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 38.1964, + "eval_f1_for_task602_wikitext_title_generation": 13.1951, + "eval_f1_for_task613_liar_keyword_tagging": 21.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 44.3492, + "eval_f1_for_task619_ohsumed_title_generation": 44.572, + "eval_f1_for_task620_ohsumed_keyword_tagging": 35.7667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 78.0, + "eval_f1_for_task640_e_snli_textual_entailment": 35.0, + "eval_f1_for_task641_e_snli_textual_entailment": 43.0, + "eval_f1_for_task642_e_snli_textual_entailment": 49.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 91.8043, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 40.0667, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.0614, + "eval_f1_for_task671_ambigqa_question_rewriting": 66.8505, + "eval_f1_for_task677_ollie_data_to_text": 35.6418, + "eval_f1_for_task738_perspectrum_textual_entailment": 81.0, + "eval_f1_for_task743_eurlex_title_generation": 37.561, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.2334, + "eval_f1_for_task769_qed_title_generation": 75.9821, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 61.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 63.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_f1_for_task890_gwsd_textual_entailment": 56.0, + "eval_f1_for_task891_gap_coreference_resolution": 54.2111, + "eval_f1_for_task892_gap_coreference_resolution": 27.0, + "eval_f1_for_task893_gap_coreference_resolution": 27.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 66.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 57.1243, + "eval_f1_for_task970_sherliic_textual_entailment": 59.0, + "eval_f1_for_textual_entailment": 44.75, + "eval_f1_for_title_generation": 35.4709, + "eval_f1_for_word_analogy": 31.5833, + "eval_gen_len": 9.2479, + "eval_global_step": 1500, + "eval_loss": 1.1699351072311401, + "eval_rouge1": 50.9359, + "eval_rouge1_for_answerability_classification": 54.7949, + "eval_rouge1_for_cause_effect_classification": 60.7769, + "eval_rouge1_for_coreference_resolution": 47.9745, + "eval_rouge1_for_data_to_text": 56.9847, + "eval_rouge1_for_dialogue_act_recognition": 59.1054, + "eval_rouge1_for_grammar_error_correction": 61.1654, + "eval_rouge1_for_keyword_tagging": 64.9187, + "eval_rouge1_for_overlap_extraction": 44.5008, + "eval_rouge1_for_question_rewriting": 72.8307, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 57.8, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.8211, + "eval_rouge1_for_task035_winogrande_question_rewriting": 90.3089, + "eval_rouge1_for_task036_qasc_keyword_tagging": 74.5035, + "eval_rouge1_for_task039_qasc_overlap_extraction": 37.0667, + "eval_rouge1_for_task050_multirc_answerability_classification": 78.0, + "eval_rouge1_for_task102_commongen_data_to_text": 70.1607, + "eval_rouge1_for_task1152_bard_word_analogy": 17.0, + "eval_rouge1_for_task1153_bard_word_analogy": 28.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 14.0, + "eval_rouge1_for_task1155_bard_word_analogy": 43.0, + "eval_rouge1_for_task1156_bard_word_analogy": 43.3333, + "eval_rouge1_for_task1157_bard_word_analogy": 43.0, + "eval_rouge1_for_task1158_bard_word_analogy": 26.0, + "eval_rouge1_for_task1159_bard_word_analogy": 37.6667, + "eval_rouge1_for_task1161_coda_19_title_generation": 41.7286, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 81.3517, + "eval_rouge1_for_task121_atomic_question_rewriting": 52.4128, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 54.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 16.0789, + "eval_rouge1_for_task1344_rte_textual_entailment": 61.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.1388, + "eval_rouge1_for_task1356_xlsum_title_generation": 23.5593, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.5545, + "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 42.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 54.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 63.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 73.0714, + "eval_rouge1_for_task1407_dart_data_to_text": 34.907, + "eval_rouge1_for_task1409_dart_data_to_text": 54.8657, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.1969, + "eval_rouge1_for_task1439_doqa_answerability_classification": 37.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 31.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 51.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 36.3397, + "eval_rouge1_for_task1554_scitail_textual_entailment": 57.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.1339, + "eval_rouge1_for_task1562_zest_question_rewriting": 57.3175, + "eval_rouge1_for_task1586_scifact_title_generation": 38.969, + "eval_rouge1_for_task1598_nyc_data_to_text": 55.1233, + "eval_rouge1_for_task1612_sick_textual_entailment": 39.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 82.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.121, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 92.2005, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_rouge1_for_task1659_billsum_title_generation": 40.4684, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 69.419, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 63.4176, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 41.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 72.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 26.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 14.0, + "eval_rouge1_for_task219_rocstories_title_generation": 25.5699, + "eval_rouge1_for_task220_rocstories_title_generation": 77.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 44.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 56.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 65.4, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 51.9349, + "eval_rouge1_for_task288_gigaword_title_generation": 30.2327, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 12.4333, + "eval_rouge1_for_task329_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 63.3571, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 56.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.0272, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 45.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 82.1728, + "eval_rouge1_for_task418_persent_title_generation": 29.2142, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.1645, + "eval_rouge1_for_task500_scruples_title_generation": 19.6912, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 41.2642, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 40.6006, + "eval_rouge1_for_task602_wikitext_title_generation": 13.6027, + "eval_rouge1_for_task613_liar_keyword_tagging": 34.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 49.7442, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.2049, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 45.119, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 78.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 35.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 43.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 49.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 92.3043, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 40.9, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.8718, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 67.4566, + "eval_rouge1_for_task677_ollie_data_to_text": 38.4754, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 81.0, + "eval_rouge1_for_task743_eurlex_title_generation": 39.1215, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.8832, + "eval_rouge1_for_task769_qed_title_generation": 75.869, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 61.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 63.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 51.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 56.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 54.5, + "eval_rouge1_for_task892_gap_coreference_resolution": 27.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 27.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 66.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 59.7539, + "eval_rouge1_for_task970_sherliic_textual_entailment": 59.0, + "eval_rouge1_for_textual_entailment": 46.25, + "eval_rouge1_for_title_generation": 37.7182, + "eval_rouge1_for_word_analogy": 31.5833, + "eval_rougeL": 49.5045, + "eval_rougeL_for_answerability_classification": 54.7949, + "eval_rougeL_for_cause_effect_classification": 59.9965, + "eval_rougeL_for_coreference_resolution": 47.9745, + "eval_rougeL_for_data_to_text": 48.6848, + "eval_rougeL_for_dialogue_act_recognition": 59.1054, + "eval_rougeL_for_grammar_error_correction": 60.4319, + "eval_rougeL_for_keyword_tagging": 64.3635, + "eval_rougeL_for_overlap_extraction": 43.6184, + "eval_rougeL_for_question_rewriting": 69.134, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 57.8, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.6961, + "eval_rougeL_for_task035_winogrande_question_rewriting": 89.8799, + "eval_rougeL_for_task036_qasc_keyword_tagging": 73.6274, + "eval_rougeL_for_task039_qasc_overlap_extraction": 37.0667, + "eval_rougeL_for_task050_multirc_answerability_classification": 78.0, + "eval_rougeL_for_task102_commongen_data_to_text": 61.3411, + "eval_rougeL_for_task1152_bard_word_analogy": 17.0, + "eval_rougeL_for_task1153_bard_word_analogy": 28.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 14.0, + "eval_rougeL_for_task1155_bard_word_analogy": 43.0, + "eval_rougeL_for_task1156_bard_word_analogy": 43.3333, + "eval_rougeL_for_task1157_bard_word_analogy": 43.0, + "eval_rougeL_for_task1158_bard_word_analogy": 26.0, + "eval_rougeL_for_task1159_bard_word_analogy": 37.6667, + "eval_rougeL_for_task1161_coda_19_title_generation": 34.9558, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 80.1884, + "eval_rougeL_for_task121_atomic_question_rewriting": 47.6162, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 54.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.5177, + "eval_rougeL_for_task1344_rte_textual_entailment": 61.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.9856, + "eval_rougeL_for_task1356_xlsum_title_generation": 19.8714, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.8975, + "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 42.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 54.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 63.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 73.0714, + "eval_rougeL_for_task1407_dart_data_to_text": 29.4644, + "eval_rougeL_for_task1409_dart_data_to_text": 44.5456, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.6817, + "eval_rougeL_for_task1439_doqa_answerability_classification": 37.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 31.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 51.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 34.3921, + "eval_rougeL_for_task1554_scitail_textual_entailment": 57.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.182, + "eval_rougeL_for_task1562_zest_question_rewriting": 49.5532, + "eval_rougeL_for_task1586_scifact_title_generation": 32.1272, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.8155, + "eval_rougeL_for_task1612_sick_textual_entailment": 39.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 82.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.8242, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 91.2531, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_rougeL_for_task1659_billsum_title_generation": 35.491, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 69.419, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 54.3723, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 41.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 72.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 26.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 14.0, + "eval_rougeL_for_task219_rocstories_title_generation": 25.5699, + "eval_rougeL_for_task220_rocstories_title_generation": 77.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 44.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 56.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 65.4, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 50.1701, + "eval_rougeL_for_task288_gigaword_title_generation": 26.6105, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 12.4333, + "eval_rougeL_for_task329_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 63.3571, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 56.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.0601, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 45.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 67.9875, + "eval_rougeL_for_task418_persent_title_generation": 26.503, + "eval_rougeL_for_task442_com_qa_question_rewriting": 67.7561, + "eval_rougeL_for_task500_scruples_title_generation": 18.8577, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 40.8227, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 39.1188, + "eval_rougeL_for_task602_wikitext_title_generation": 13.6027, + "eval_rougeL_for_task613_liar_keyword_tagging": 34.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 45.2489, + "eval_rougeL_for_task619_ohsumed_title_generation": 42.3147, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 43.219, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 78.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 35.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 43.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 49.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 92.3043, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 40.9, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.0907, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 65.8956, + "eval_rougeL_for_task677_ollie_data_to_text": 31.6685, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 81.0, + "eval_rougeL_for_task743_eurlex_title_generation": 34.7556, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.9186, + "eval_rougeL_for_task769_qed_title_generation": 75.869, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 61.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 63.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 51.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 56.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 54.5, + "eval_rougeL_for_task892_gap_coreference_resolution": 27.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 27.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 58.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 66.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 46.1368, + "eval_rougeL_for_task970_sherliic_textual_entailment": 59.0, + "eval_rougeL_for_textual_entailment": 46.25, + "eval_rougeL_for_title_generation": 34.9272, + "eval_rougeL_for_word_analogy": 31.5833, + "eval_runtime": 917.9608, + "eval_samples_per_second": 12.974, + "eval_steps_per_second": 0.812, + "step": 1500 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 1.0056, + "step": 2000 + }, + { + "epoch": 0.44, + "eval_exact_match": 32.0907, + "eval_exact_match_for_answerability_classification": 54.9231, + "eval_exact_match_for_cause_effect_classification": 45.2857, + "eval_exact_match_for_coreference_resolution": 41.4286, + "eval_exact_match_for_data_to_text": 6.4165, + "eval_exact_match_for_dialogue_act_recognition": 53.8571, + "eval_exact_match_for_grammar_error_correction": 6.5, + "eval_exact_match_for_keyword_tagging": 45.8, + "eval_exact_match_for_overlap_extraction": 17.0, + "eval_exact_match_for_question_rewriting": 4.2727, + "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 62.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 4.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 19.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 34.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 71.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 26.0, + "eval_exact_match_for_task1153_bard_word_analogy": 28.0, + "eval_exact_match_for_task1154_bard_word_analogy": 15.0, + "eval_exact_match_for_task1155_bard_word_analogy": 73.0, + "eval_exact_match_for_task1156_bard_word_analogy": 63.0, + "eval_exact_match_for_task1157_bard_word_analogy": 58.0, + "eval_exact_match_for_task1158_bard_word_analogy": 34.0, + "eval_exact_match_for_task1159_bard_word_analogy": 42.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 22.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 52.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 1.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 1.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 2.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 24.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 72.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 2.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 46.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 29.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 61.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 52.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 47.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 44.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 9.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 52.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 44.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 39.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 43.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 84.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 20.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 8.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 97.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 52.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 51.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 77.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 60.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 57.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 56.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 60.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 62.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 61.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 33.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 24.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 18.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 83.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 37.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 37.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 9.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 82.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 59.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 52.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 70.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 87.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 31.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 51.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 71.0, + "eval_exact_match_for_textual_entailment": 39.0, + "eval_exact_match_for_title_generation": 10.2578, + "eval_exact_match_for_word_analogy": 42.375, + "eval_f1": 49.2202, + "eval_f1_for_answerability_classification": 57.4872, + "eval_f1_for_cause_effect_classification": 62.3344, + "eval_f1_for_coreference_resolution": 46.9182, + "eval_f1_for_data_to_text": 54.7852, + "eval_f1_for_dialogue_act_recognition": 56.6429, + "eval_f1_for_grammar_error_correction": 59.8349, + "eval_f1_for_keyword_tagging": 60.2656, + "eval_f1_for_overlap_extraction": 34.7518, + "eval_f1_for_question_rewriting": 69.2147, + "eval_f1_for_task020_mctaco_answerability_classification": 52.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 64.5, + "eval_f1_for_task034_winogrande_question_rewriting": 79.674, + "eval_f1_for_task035_winogrande_question_rewriting": 87.1721, + "eval_f1_for_task036_qasc_keyword_tagging": 60.4662, + "eval_f1_for_task039_qasc_overlap_extraction": 42.0667, + "eval_f1_for_task050_multirc_answerability_classification": 71.0, + "eval_f1_for_task102_commongen_data_to_text": 53.477, + "eval_f1_for_task1152_bard_word_analogy": 26.0, + "eval_f1_for_task1153_bard_word_analogy": 28.6667, + "eval_f1_for_task1154_bard_word_analogy": 15.0, + "eval_f1_for_task1155_bard_word_analogy": 73.0, + "eval_f1_for_task1156_bard_word_analogy": 64.3333, + "eval_f1_for_task1157_bard_word_analogy": 58.0, + "eval_f1_for_task1158_bard_word_analogy": 34.0, + "eval_f1_for_task1159_bard_word_analogy": 42.0, + "eval_f1_for_task1161_coda_19_title_generation": 32.7784, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 83.4197, + "eval_f1_for_task121_atomic_question_rewriting": 48.1497, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.5118, + "eval_f1_for_task1344_rte_textual_entailment": 52.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.6394, + "eval_f1_for_task1356_xlsum_title_generation": 19.3795, + "eval_f1_for_task1358_xlsum_title_generation": 34.3577, + "eval_f1_for_task1385_anli_textual_entailment": 1.0, + "eval_f1_for_task1386_anli_textual_entailment": 1.0, + "eval_f1_for_task1387_anli_textual_entailment": 2.0, + "eval_f1_for_task1388_cb_textual_entailment": 24.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 72.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 50.0, + "eval_f1_for_task1407_dart_data_to_text": 40.8501, + "eval_f1_for_task1409_dart_data_to_text": 53.1736, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.8885, + "eval_f1_for_task1439_doqa_answerability_classification": 48.0, + "eval_f1_for_task1442_doqa_answerability_classification": 46.0, + "eval_f1_for_task1516_imppres_textual_entailment": 29.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 61.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 34.6576, + "eval_f1_for_task1554_scitail_textual_entailment": 52.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.7812, + "eval_f1_for_task1562_zest_question_rewriting": 47.5734, + "eval_f1_for_task1586_scifact_title_generation": 35.2737, + "eval_f1_for_task1598_nyc_data_to_text": 55.2166, + "eval_f1_for_task1612_sick_textual_entailment": 47.0, + "eval_f1_for_task1615_sick_textual_entailment": 44.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 80.9647, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 52.0, + "eval_f1_for_task1631_open_pi_data_to_text": 93.4232, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_f1_for_task1659_billsum_title_generation": 34.9177, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 76.2367, + "eval_f1_for_task1728_web_nlg_data_to_text": 65.2847, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 43.0, + "eval_f1_for_task200_multinli_textual_entailment": 84.0, + "eval_f1_for_task201_multinli_textual_entailment": 20.0, + "eval_f1_for_task202_multinli_textual_entailment": 8.0, + "eval_f1_for_task219_rocstories_title_generation": 19.8697, + "eval_f1_for_task220_rocstories_title_generation": 97.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_f1_for_task232_iirc_answerability_classification": 52.0, + "eval_f1_for_task233_iirc_answerability_classification": 51.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 77.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 71.55, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 27.4368, + "eval_f1_for_task288_gigaword_title_generation": 30.0767, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 6.1667, + "eval_f1_for_task329_gap_coreference_resolution": 43.0, + "eval_f1_for_task330_gap_coreference_resolution": 65.0905, + "eval_f1_for_task349_squad2.0_answerability_classification": 56.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 79.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 87.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.4604, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 35.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 73.6572, + "eval_f1_for_task418_persent_title_generation": 25.4619, + "eval_f1_for_task442_com_qa_question_rewriting": 72.8346, + "eval_f1_for_task500_scruples_title_generation": 18.2797, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 42.1406, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 39.3586, + "eval_f1_for_task602_wikitext_title_generation": 15.8829, + "eval_f1_for_task613_liar_keyword_tagging": 26.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 34.5469, + "eval_f1_for_task619_ohsumed_title_generation": 42.4302, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.5667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 83.0, + "eval_f1_for_task640_e_snli_textual_entailment": 37.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 37.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.6286, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 9.5, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.4495, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.8271, + "eval_f1_for_task677_ollie_data_to_text": 34.0974, + "eval_f1_for_task738_perspectrum_textual_entailment": 82.0, + "eval_f1_for_task743_eurlex_title_generation": 35.8904, + "eval_f1_for_task760_msr_sqa_data_to_text": 5.4236, + "eval_f1_for_task769_qed_title_generation": 73.9874, + "eval_f1_for_task827_copa_cause_effect_classification": 52.0, + "eval_f1_for_task828_copa_cause_effect_classification": 70.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 87.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 31.0, + "eval_f1_for_task890_gwsd_textual_entailment": 51.0, + "eval_f1_for_task891_gap_coreference_resolution": 59.1444, + "eval_f1_for_task892_gap_coreference_resolution": 45.0, + "eval_f1_for_task893_gap_coreference_resolution": 28.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 55.5927, + "eval_f1_for_task970_sherliic_textual_entailment": 71.0, + "eval_f1_for_textual_entailment": 39.0, + "eval_f1_for_title_generation": 36.0826, + "eval_f1_for_word_analogy": 42.625, + "eval_gen_len": 9.6397, + "eval_global_step": 2000, + "eval_loss": 1.17257821559906, + "eval_rouge1": 51.8862, + "eval_rouge1_for_answerability_classification": 57.4872, + "eval_rouge1_for_cause_effect_classification": 62.7691, + "eval_rouge1_for_coreference_resolution": 47.3599, + "eval_rouge1_for_data_to_text": 57.6468, + "eval_rouge1_for_dialogue_act_recognition": 60.0102, + "eval_rouge1_for_grammar_error_correction": 63.9364, + "eval_rouge1_for_keyword_tagging": 65.6198, + "eval_rouge1_for_overlap_extraction": 39.4024, + "eval_rouge1_for_question_rewriting": 70.8264, + "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 64.4, + "eval_rouge1_for_task034_winogrande_question_rewriting": 79.6896, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.8411, + "eval_rouge1_for_task036_qasc_keyword_tagging": 67.7511, + "eval_rouge1_for_task039_qasc_overlap_extraction": 50.0667, + "eval_rouge1_for_task050_multirc_answerability_classification": 71.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.6807, + "eval_rouge1_for_task1152_bard_word_analogy": 26.0, + "eval_rouge1_for_task1153_bard_word_analogy": 28.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 15.0, + "eval_rouge1_for_task1155_bard_word_analogy": 73.0, + "eval_rouge1_for_task1156_bard_word_analogy": 64.3333, + "eval_rouge1_for_task1157_bard_word_analogy": 58.0, + "eval_rouge1_for_task1158_bard_word_analogy": 34.0, + "eval_rouge1_for_task1159_bard_word_analogy": 42.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 36.9288, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 83.6767, + "eval_rouge1_for_task121_atomic_question_rewriting": 50.3289, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.0252, + "eval_rouge1_for_task1344_rte_textual_entailment": 52.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.8092, + "eval_rouge1_for_task1356_xlsum_title_generation": 23.9002, + "eval_rouge1_for_task1358_xlsum_title_generation": 39.1607, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 72.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 56.5714, + "eval_rouge1_for_task1407_dart_data_to_text": 41.9488, + "eval_rouge1_for_task1409_dart_data_to_text": 53.7986, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.0591, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 46.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 29.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 61.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 37.3515, + "eval_rouge1_for_task1554_scitail_textual_entailment": 52.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.8136, + "eval_rouge1_for_task1562_zest_question_rewriting": 50.8685, + "eval_rouge1_for_task1586_scifact_title_generation": 39.3281, + "eval_rouge1_for_task1598_nyc_data_to_text": 56.3352, + "eval_rouge1_for_task1612_sick_textual_entailment": 47.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 81.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 81.2262, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 52.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 93.5199, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_rouge1_for_task1659_billsum_title_generation": 36.6251, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 76.2367, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 66.726, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 43.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 84.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 20.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 8.0, + "eval_rouge1_for_task219_rocstories_title_generation": 25.0143, + "eval_rouge1_for_task220_rocstories_title_generation": 97.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 52.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 51.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 77.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 72.2167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 28.7381, + "eval_rouge1_for_task288_gigaword_title_generation": 32.8861, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 6.1667, + "eval_rouge1_for_task329_gap_coreference_resolution": 43.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 64.8524, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 56.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 79.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.1905, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 41.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 76.648, + "eval_rouge1_for_task418_persent_title_generation": 28.8179, + "eval_rouge1_for_task442_com_qa_question_rewriting": 75.9297, + "eval_rouge1_for_task500_scruples_title_generation": 20.9004, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.9006, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 41.1827, + "eval_rouge1_for_task602_wikitext_title_generation": 16.8874, + "eval_rouge1_for_task613_liar_keyword_tagging": 38.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 36.8601, + "eval_rouge1_for_task619_ohsumed_title_generation": 45.7312, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.3857, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 83.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 37.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 37.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.1286, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 9.4, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.5312, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.5413, + "eval_rouge1_for_task677_ollie_data_to_text": 36.96, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 86.0, + "eval_rouge1_for_task743_eurlex_title_generation": 37.9785, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.9551, + "eval_rouge1_for_task769_qed_title_generation": 73.541, + "eval_rouge1_for_task827_copa_cause_effect_classification": 52.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 70.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 87.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 51.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 59.4333, + "eval_rouge1_for_task892_gap_coreference_resolution": 45.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 28.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 57.6454, + "eval_rouge1_for_task970_sherliic_textual_entailment": 71.0, + "eval_rouge1_for_textual_entailment": 45.4306, + "eval_rouge1_for_title_generation": 38.5907, + "eval_rouge1_for_word_analogy": 42.625, + "eval_rougeL": 50.3697, + "eval_rougeL_for_answerability_classification": 57.4872, + "eval_rougeL_for_cause_effect_classification": 62.2039, + "eval_rougeL_for_coreference_resolution": 47.3599, + "eval_rougeL_for_data_to_text": 48.9976, + "eval_rougeL_for_dialogue_act_recognition": 60.0102, + "eval_rougeL_for_grammar_error_correction": 62.9821, + "eval_rougeL_for_keyword_tagging": 65.0449, + "eval_rougeL_for_overlap_extraction": 38.9015, + "eval_rougeL_for_question_rewriting": 67.3025, + "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 64.4, + "eval_rougeL_for_task034_winogrande_question_rewriting": 79.5133, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.3967, + "eval_rougeL_for_task036_qasc_keyword_tagging": 66.71, + "eval_rougeL_for_task039_qasc_overlap_extraction": 50.0667, + "eval_rougeL_for_task050_multirc_answerability_classification": 71.0, + "eval_rougeL_for_task102_commongen_data_to_text": 60.2588, + "eval_rougeL_for_task1152_bard_word_analogy": 26.0, + "eval_rougeL_for_task1153_bard_word_analogy": 28.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 15.0, + "eval_rougeL_for_task1155_bard_word_analogy": 73.0, + "eval_rougeL_for_task1156_bard_word_analogy": 64.3333, + "eval_rougeL_for_task1157_bard_word_analogy": 58.0, + "eval_rougeL_for_task1158_bard_word_analogy": 34.0, + "eval_rougeL_for_task1159_bard_word_analogy": 42.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 30.0001, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 82.5875, + "eval_rougeL_for_task121_atomic_question_rewriting": 45.9655, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.2344, + "eval_rougeL_for_task1344_rte_textual_entailment": 52.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.6149, + "eval_rougeL_for_task1356_xlsum_title_generation": 19.3047, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.7671, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 72.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 56.5714, + "eval_rougeL_for_task1407_dart_data_to_text": 34.4873, + "eval_rougeL_for_task1409_dart_data_to_text": 45.9111, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.1838, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 46.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 29.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 61.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 33.5294, + "eval_rougeL_for_task1554_scitail_textual_entailment": 52.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7803, + "eval_rougeL_for_task1562_zest_question_rewriting": 44.4217, + "eval_rougeL_for_task1586_scifact_title_generation": 32.4334, + "eval_rougeL_for_task1598_nyc_data_to_text": 42.0642, + "eval_rougeL_for_task1612_sick_textual_entailment": 47.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 81.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.9294, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 52.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 89.5548, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_rougeL_for_task1659_billsum_title_generation": 31.6998, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 76.2367, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 56.8724, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 43.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 84.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 20.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 8.0, + "eval_rougeL_for_task219_rocstories_title_generation": 25.0143, + "eval_rougeL_for_task220_rocstories_title_generation": 97.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 52.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 51.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 77.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 72.2167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 27.7364, + "eval_rougeL_for_task288_gigaword_title_generation": 28.7456, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 6.1667, + "eval_rougeL_for_task329_gap_coreference_resolution": 43.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 64.8524, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 56.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 79.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 33.4268, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 41.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 63.1411, + "eval_rougeL_for_task418_persent_title_generation": 25.1651, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.8484, + "eval_rougeL_for_task500_scruples_title_generation": 19.3537, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.0835, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 37.8349, + "eval_rougeL_for_task602_wikitext_title_generation": 16.8874, + "eval_rougeL_for_task613_liar_keyword_tagging": 38.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 33.667, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.8135, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 42.5524, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 83.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 37.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 37.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.1286, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 9.4, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.4801, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.4285, + "eval_rougeL_for_task677_ollie_data_to_text": 30.0152, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 86.0, + "eval_rougeL_for_task743_eurlex_title_generation": 33.0788, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.3194, + "eval_rougeL_for_task769_qed_title_generation": 73.541, + "eval_rougeL_for_task827_copa_cause_effect_classification": 52.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 70.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 87.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 51.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 59.4333, + "eval_rougeL_for_task892_gap_coreference_resolution": 45.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 28.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 44.1736, + "eval_rougeL_for_task970_sherliic_textual_entailment": 71.0, + "eval_rougeL_for_textual_entailment": 45.4306, + "eval_rougeL_for_title_generation": 35.1897, + "eval_rougeL_for_word_analogy": 42.625, + "eval_runtime": 988.9113, + "eval_samples_per_second": 12.044, + "eval_steps_per_second": 0.753, + "step": 2000 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 0.9702, + "step": 2500 + }, + { + "epoch": 0.55, + "eval_exact_match": 33.3921, + "eval_exact_match_for_answerability_classification": 54.7692, + "eval_exact_match_for_cause_effect_classification": 49.0, + "eval_exact_match_for_coreference_resolution": 43.2857, + "eval_exact_match_for_data_to_text": 8.2324, + "eval_exact_match_for_dialogue_act_recognition": 53.2857, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 37.2, + "eval_exact_match_for_overlap_extraction": 15.5, + "eval_exact_match_for_question_rewriting": 3.0909, + "eval_exact_match_for_task020_mctaco_answerability_classification": 58.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 62.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 8.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 31.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 68.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 37.0, + "eval_exact_match_for_task1153_bard_word_analogy": 25.0, + "eval_exact_match_for_task1154_bard_word_analogy": 20.0, + "eval_exact_match_for_task1155_bard_word_analogy": 81.0, + "eval_exact_match_for_task1156_bard_word_analogy": 61.0, + "eval_exact_match_for_task1157_bard_word_analogy": 56.0, + "eval_exact_match_for_task1158_bard_word_analogy": 48.0, + "eval_exact_match_for_task1159_bard_word_analogy": 41.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 15.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 54.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 52.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 2.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 41.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 60.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 82.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 68.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 46.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 56.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 47.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 52.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 44.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 52.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 50.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 48.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 62.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 39.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 41.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 88.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 17.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 4.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 71.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 61.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 54.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 65.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 54.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 69.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 61.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 57.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 21.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 5.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 18.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 39.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 83.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 2.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 79.0, + "eval_exact_match_for_task743_eurlex_title_generation": 3.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 63.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 77.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 66.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 32.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 44.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 69.0, + "eval_exact_match_for_textual_entailment": 44.0, + "eval_exact_match_for_title_generation": 10.3139, + "eval_exact_match_for_word_analogy": 46.125, + "eval_f1": 50.9056, + "eval_f1_for_answerability_classification": 57.3333, + "eval_f1_for_cause_effect_classification": 67.7552, + "eval_f1_for_coreference_resolution": 48.0037, + "eval_f1_for_data_to_text": 54.7884, + "eval_f1_for_dialogue_act_recognition": 55.5, + "eval_f1_for_grammar_error_correction": 58.0464, + "eval_f1_for_keyword_tagging": 54.1378, + "eval_f1_for_overlap_extraction": 37.8852, + "eval_f1_for_question_rewriting": 70.2608, + "eval_f1_for_task020_mctaco_answerability_classification": 58.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 65.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 88.6889, + "eval_f1_for_task035_winogrande_question_rewriting": 85.9271, + "eval_f1_for_task036_qasc_keyword_tagging": 57.0368, + "eval_f1_for_task039_qasc_overlap_extraction": 40.3333, + "eval_f1_for_task050_multirc_answerability_classification": 68.0, + "eval_f1_for_task102_commongen_data_to_text": 55.672, + "eval_f1_for_task1152_bard_word_analogy": 37.0, + "eval_f1_for_task1153_bard_word_analogy": 25.6667, + "eval_f1_for_task1154_bard_word_analogy": 20.0, + "eval_f1_for_task1155_bard_word_analogy": 81.0, + "eval_f1_for_task1156_bard_word_analogy": 63.0, + "eval_f1_for_task1157_bard_word_analogy": 56.0, + "eval_f1_for_task1158_bard_word_analogy": 48.0, + "eval_f1_for_task1159_bard_word_analogy": 41.6667, + "eval_f1_for_task1161_coda_19_title_generation": 36.5218, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 82.7685, + "eval_f1_for_task121_atomic_question_rewriting": 49.4024, + "eval_f1_for_task133_winowhy_coreference_resolution": 54.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.7787, + "eval_f1_for_task1344_rte_textual_entailment": 52.0, + "eval_f1_for_task1345_qqp_question_rewriting": 41.53, + "eval_f1_for_task1356_xlsum_title_generation": 19.826, + "eval_f1_for_task1358_xlsum_title_generation": 33.2321, + "eval_f1_for_task1385_anli_textual_entailment": 32.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 41.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 60.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 82.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 68.0, + "eval_f1_for_task1407_dart_data_to_text": 36.9446, + "eval_f1_for_task1409_dart_data_to_text": 54.0495, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 31.8551, + "eval_f1_for_task1439_doqa_answerability_classification": 46.0, + "eval_f1_for_task1442_doqa_answerability_classification": 56.0, + "eval_f1_for_task1516_imppres_textual_entailment": 47.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 52.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 44.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 52.0, + "eval_f1_for_task1540_peer_read_title_generation": 38.3545, + "eval_f1_for_task1554_scitail_textual_entailment": 52.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.2376, + "eval_f1_for_task1562_zest_question_rewriting": 47.5141, + "eval_f1_for_task1586_scifact_title_generation": 36.9699, + "eval_f1_for_task1598_nyc_data_to_text": 53.4164, + "eval_f1_for_task1612_sick_textual_entailment": 50.0, + "eval_f1_for_task1615_sick_textual_entailment": 48.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 80.058, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_f1_for_task1631_open_pi_data_to_text": 97.5672, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_f1_for_task1659_billsum_title_generation": 37.2846, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 73.0, + "eval_f1_for_task1728_web_nlg_data_to_text": 64.7431, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 41.0, + "eval_f1_for_task200_multinli_textual_entailment": 88.0, + "eval_f1_for_task201_multinli_textual_entailment": 17.0, + "eval_f1_for_task202_multinli_textual_entailment": 4.0, + "eval_f1_for_task219_rocstories_title_generation": 18.785, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 47.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 71.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 70.8833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 35.437, + "eval_f1_for_task288_gigaword_title_generation": 27.1339, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 4.3333, + "eval_f1_for_task329_gap_coreference_resolution": 54.0, + "eval_f1_for_task330_gap_coreference_resolution": 72.5905, + "eval_f1_for_task349_squad2.0_answerability_classification": 54.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 84.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 87.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 85.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.5599, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 22.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 74.6791, + "eval_f1_for_task418_persent_title_generation": 26.2653, + "eval_f1_for_task442_com_qa_question_rewriting": 72.4252, + "eval_f1_for_task500_scruples_title_generation": 20.1525, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.9856, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 43.1172, + "eval_f1_for_task602_wikitext_title_generation": 13.13, + "eval_f1_for_task613_liar_keyword_tagging": 23.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 45.0598, + "eval_f1_for_task619_ohsumed_title_generation": 44.6327, + "eval_f1_for_task620_ohsumed_keyword_tagging": 41.5667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_f1_for_task640_e_snli_textual_entailment": 30.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 39.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.0857, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 2.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 80.8014, + "eval_f1_for_task671_ambigqa_question_rewriting": 69.0745, + "eval_f1_for_task677_ollie_data_to_text": 34.9212, + "eval_f1_for_task738_perspectrum_textual_entailment": 79.0, + "eval_f1_for_task743_eurlex_title_generation": 39.7274, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.866, + "eval_f1_for_task769_qed_title_generation": 77.5813, + "eval_f1_for_task827_copa_cause_effect_classification": 77.0, + "eval_f1_for_task828_copa_cause_effect_classification": 66.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 32.0, + "eval_f1_for_task890_gwsd_textual_entailment": 44.0, + "eval_f1_for_task891_gap_coreference_resolution": 63.2444, + "eval_f1_for_task892_gap_coreference_resolution": 49.0, + "eval_f1_for_task893_gap_coreference_resolution": 31.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task957_e2e_data_to_text": 54.2333, + "eval_f1_for_task970_sherliic_textual_entailment": 69.0, + "eval_f1_for_textual_entailment": 44.0, + "eval_f1_for_title_generation": 37.353, + "eval_f1_for_word_analogy": 46.5417, + "eval_gen_len": 9.186, + "eval_global_step": 2500, + "eval_loss": 1.1172577142715454, + "eval_rouge1": 52.5972, + "eval_rouge1_for_answerability_classification": 57.3333, + "eval_rouge1_for_cause_effect_classification": 68.6536, + "eval_rouge1_for_coreference_resolution": 48.4121, + "eval_rouge1_for_data_to_text": 57.6871, + "eval_rouge1_for_dialogue_act_recognition": 57.9456, + "eval_rouge1_for_grammar_error_correction": 63.0033, + "eval_rouge1_for_keyword_tagging": 59.7296, + "eval_rouge1_for_overlap_extraction": 42.7334, + "eval_rouge1_for_question_rewriting": 72.0289, + "eval_rouge1_for_task020_mctaco_answerability_classification": 58.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 65.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 88.7588, + "eval_rouge1_for_task035_winogrande_question_rewriting": 86.5428, + "eval_rouge1_for_task036_qasc_keyword_tagging": 61.2767, + "eval_rouge1_for_task039_qasc_overlap_extraction": 48.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 68.0, + "eval_rouge1_for_task102_commongen_data_to_text": 69.1122, + "eval_rouge1_for_task1152_bard_word_analogy": 37.0, + "eval_rouge1_for_task1153_bard_word_analogy": 26.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 20.0, + "eval_rouge1_for_task1155_bard_word_analogy": 81.0, + "eval_rouge1_for_task1156_bard_word_analogy": 63.0, + "eval_rouge1_for_task1157_bard_word_analogy": 56.0, + "eval_rouge1_for_task1158_bard_word_analogy": 48.0, + "eval_rouge1_for_task1159_bard_word_analogy": 41.6667, + "eval_rouge1_for_task1161_coda_19_title_generation": 40.1868, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 82.9967, + "eval_rouge1_for_task121_atomic_question_rewriting": 52.3254, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 54.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 16.2232, + "eval_rouge1_for_task1344_rte_textual_entailment": 52.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 44.6701, + "eval_rouge1_for_task1356_xlsum_title_generation": 23.7727, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.6455, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 60.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 82.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 71.4524, + "eval_rouge1_for_task1407_dart_data_to_text": 37.5816, + "eval_rouge1_for_task1409_dart_data_to_text": 54.8789, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.409, + "eval_rouge1_for_task1439_doqa_answerability_classification": 46.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 56.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 47.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 52.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 52.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 41.2474, + "eval_rouge1_for_task1554_scitail_textual_entailment": 52.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.5977, + "eval_rouge1_for_task1562_zest_question_rewriting": 51.688, + "eval_rouge1_for_task1586_scifact_title_generation": 41.2628, + "eval_rouge1_for_task1598_nyc_data_to_text": 55.5976, + "eval_rouge1_for_task1612_sick_textual_entailment": 50.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 82.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.3347, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 97.6815, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_rouge1_for_task1659_billsum_title_generation": 39.4078, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 73.0, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 66.4413, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 41.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 88.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 17.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 4.0, + "eval_rouge1_for_task219_rocstories_title_generation": 24.5223, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 47.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 71.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 71.55, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 36.8001, + "eval_rouge1_for_task288_gigaword_title_generation": 30.666, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 4.3333, + "eval_rouge1_for_task329_gap_coreference_resolution": 54.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 72.3524, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 54.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 84.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 87.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 85.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.9225, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 27.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 77.3, + "eval_rouge1_for_task418_persent_title_generation": 29.9023, + "eval_rouge1_for_task442_com_qa_question_rewriting": 75.5974, + "eval_rouge1_for_task500_scruples_title_generation": 22.5808, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.4956, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.4964, + "eval_rouge1_for_task602_wikitext_title_generation": 14.3976, + "eval_rouge1_for_task613_liar_keyword_tagging": 38.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 50.9862, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.3881, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 49.4524, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 39.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.5857, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 2.0, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.0421, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 70.0614, + "eval_rouge1_for_task677_ollie_data_to_text": 37.6243, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 79.0, + "eval_rouge1_for_task743_eurlex_title_generation": 41.6944, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.0357, + "eval_rouge1_for_task769_qed_title_generation": 77.5146, + "eval_rouge1_for_task827_copa_cause_effect_classification": 77.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 66.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 44.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 63.5333, + "eval_rouge1_for_task892_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task957_e2e_data_to_text": 56.529, + "eval_rouge1_for_task970_sherliic_textual_entailment": 69.0, + "eval_rouge1_for_textual_entailment": 45.4444, + "eval_rouge1_for_title_generation": 39.916, + "eval_rouge1_for_word_analogy": 46.6667, + "eval_rougeL": 51.1383, + "eval_rougeL_for_answerability_classification": 57.3333, + "eval_rougeL_for_cause_effect_classification": 67.9872, + "eval_rougeL_for_coreference_resolution": 48.4121, + "eval_rougeL_for_data_to_text": 49.1992, + "eval_rougeL_for_dialogue_act_recognition": 57.9456, + "eval_rougeL_for_grammar_error_correction": 62.2733, + "eval_rougeL_for_keyword_tagging": 58.9371, + "eval_rougeL_for_overlap_extraction": 42.1729, + "eval_rougeL_for_question_rewriting": 68.6136, + "eval_rougeL_for_task020_mctaco_answerability_classification": 58.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 65.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 88.5335, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.5501, + "eval_rougeL_for_task036_qasc_keyword_tagging": 59.7307, + "eval_rougeL_for_task039_qasc_overlap_extraction": 48.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 68.0, + "eval_rougeL_for_task102_commongen_data_to_text": 60.8814, + "eval_rougeL_for_task1152_bard_word_analogy": 37.0, + "eval_rougeL_for_task1153_bard_word_analogy": 26.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 20.0, + "eval_rougeL_for_task1155_bard_word_analogy": 81.0, + "eval_rougeL_for_task1156_bard_word_analogy": 63.0, + "eval_rougeL_for_task1157_bard_word_analogy": 56.0, + "eval_rougeL_for_task1158_bard_word_analogy": 48.0, + "eval_rougeL_for_task1159_bard_word_analogy": 41.6667, + "eval_rougeL_for_task1161_coda_19_title_generation": 34.0638, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 82.0672, + "eval_rougeL_for_task121_atomic_question_rewriting": 46.4939, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 54.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.6476, + "eval_rougeL_for_task1344_rte_textual_entailment": 52.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 41.6847, + "eval_rougeL_for_task1356_xlsum_title_generation": 20.5701, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.7299, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 60.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 82.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 71.4524, + "eval_rougeL_for_task1407_dart_data_to_text": 31.0427, + "eval_rougeL_for_task1409_dart_data_to_text": 46.4882, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.8137, + "eval_rougeL_for_task1439_doqa_answerability_classification": 46.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 56.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 47.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 52.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 52.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 37.7766, + "eval_rougeL_for_task1554_scitail_textual_entailment": 52.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.7328, + "eval_rougeL_for_task1562_zest_question_rewriting": 46.0918, + "eval_rougeL_for_task1586_scifact_title_generation": 35.2675, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.2794, + "eval_rougeL_for_task1612_sick_textual_entailment": 50.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 82.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.1772, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 94.6559, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_rougeL_for_task1659_billsum_title_generation": 34.2634, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 73.0, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 57.7602, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 41.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 88.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 17.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 4.0, + "eval_rougeL_for_task219_rocstories_title_generation": 24.5223, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 47.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 71.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 71.55, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 35.6792, + "eval_rougeL_for_task288_gigaword_title_generation": 26.7837, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 4.3333, + "eval_rougeL_for_task329_gap_coreference_resolution": 54.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 72.3524, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 54.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 84.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 87.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 85.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.0799, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 27.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 63.4507, + "eval_rougeL_for_task418_persent_title_generation": 26.2185, + "eval_rougeL_for_task442_com_qa_question_rewriting": 71.4192, + "eval_rougeL_for_task500_scruples_title_generation": 21.518, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 41.7869, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 41.951, + "eval_rougeL_for_task602_wikitext_title_generation": 14.3976, + "eval_rougeL_for_task613_liar_keyword_tagging": 38.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 47.1637, + "eval_rougeL_for_task619_ohsumed_title_generation": 40.6531, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 47.0357, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 56.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 39.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.5857, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 2.0, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.453, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.8281, + "eval_rougeL_for_task677_ollie_data_to_text": 30.3186, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 79.0, + "eval_rougeL_for_task743_eurlex_title_generation": 36.8672, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.7085, + "eval_rougeL_for_task769_qed_title_generation": 77.5146, + "eval_rougeL_for_task827_copa_cause_effect_classification": 77.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 66.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 44.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 63.5333, + "eval_rougeL_for_task892_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 60.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.9949, + "eval_rougeL_for_task970_sherliic_textual_entailment": 69.0, + "eval_rougeL_for_textual_entailment": 45.4444, + "eval_rougeL_for_title_generation": 36.8401, + "eval_rougeL_for_word_analogy": 46.6667, + "eval_runtime": 857.3961, + "eval_samples_per_second": 13.891, + "eval_steps_per_second": 0.869, + "step": 2500 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.9444, + "step": 3000 + }, + { + "epoch": 0.66, + "eval_exact_match": 34.1394, + "eval_exact_match_for_answerability_classification": 56.8462, + "eval_exact_match_for_cause_effect_classification": 49.2857, + "eval_exact_match_for_coreference_resolution": 41.5714, + "eval_exact_match_for_data_to_text": 8.1114, + "eval_exact_match_for_dialogue_act_recognition": 47.5714, + "eval_exact_match_for_grammar_error_correction": 8.5, + "eval_exact_match_for_keyword_tagging": 46.2, + "eval_exact_match_for_overlap_extraction": 18.0, + "eval_exact_match_for_question_rewriting": 1.8182, + "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 59.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 31.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 36.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 71.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 43.0, + "eval_exact_match_for_task1153_bard_word_analogy": 28.0, + "eval_exact_match_for_task1154_bard_word_analogy": 24.0, + "eval_exact_match_for_task1155_bard_word_analogy": 88.0, + "eval_exact_match_for_task1156_bard_word_analogy": 53.0, + "eval_exact_match_for_task1157_bard_word_analogy": 58.0, + "eval_exact_match_for_task1158_bard_word_analogy": 43.0, + "eval_exact_match_for_task1159_bard_word_analogy": 42.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 48.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 53.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 27.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 37.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 42.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 57.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 88.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 62.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 42.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 60.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 55.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 64.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 17.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 53.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 52.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 54.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 60.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 72.0, + "eval_exact_match_for_task1659_billsum_title_generation": 6.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 8.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 2.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 49.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 83.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 10.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 6.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 52.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 77.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 53.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 6.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 54.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 62.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 56.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 59.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 57.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 35.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 1.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 2.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 53.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 22.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 18.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 83.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 29.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 38.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 40.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 77.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 23.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 87.0, + "eval_exact_match_for_task743_eurlex_title_generation": 3.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 64.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 87.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 63.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 49.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 54.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 38.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 70.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 77.0, + "eval_exact_match_for_textual_entailment": 46.75, + "eval_exact_match_for_title_generation": 10.9305, + "eval_exact_match_for_word_analogy": 47.375, + "eval_f1": 51.8419, + "eval_f1_for_answerability_classification": 59.4103, + "eval_f1_for_cause_effect_classification": 68.931, + "eval_f1_for_coreference_resolution": 48.0063, + "eval_f1_for_data_to_text": 52.1584, + "eval_f1_for_dialogue_act_recognition": 50.4286, + "eval_f1_for_grammar_error_correction": 58.0488, + "eval_f1_for_keyword_tagging": 61.5515, + "eval_f1_for_overlap_extraction": 38.6159, + "eval_f1_for_question_rewriting": 71.7361, + "eval_f1_for_task020_mctaco_answerability_classification": 52.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 60.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 91.2137, + "eval_f1_for_task035_winogrande_question_rewriting": 88.8821, + "eval_f1_for_task036_qasc_keyword_tagging": 67.6249, + "eval_f1_for_task039_qasc_overlap_extraction": 42.9, + "eval_f1_for_task050_multirc_answerability_classification": 71.0, + "eval_f1_for_task102_commongen_data_to_text": 53.597, + "eval_f1_for_task1152_bard_word_analogy": 43.0, + "eval_f1_for_task1153_bard_word_analogy": 29.3333, + "eval_f1_for_task1154_bard_word_analogy": 24.0, + "eval_f1_for_task1155_bard_word_analogy": 88.0, + "eval_f1_for_task1156_bard_word_analogy": 53.6667, + "eval_f1_for_task1157_bard_word_analogy": 58.0, + "eval_f1_for_task1158_bard_word_analogy": 43.0, + "eval_f1_for_task1159_bard_word_analogy": 42.6667, + "eval_f1_for_task1161_coda_19_title_generation": 37.5845, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.1158, + "eval_f1_for_task121_atomic_question_rewriting": 49.747, + "eval_f1_for_task133_winowhy_coreference_resolution": 48.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.5219, + "eval_f1_for_task1344_rte_textual_entailment": 53.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.2967, + "eval_f1_for_task1356_xlsum_title_generation": 22.6573, + "eval_f1_for_task1358_xlsum_title_generation": 32.5235, + "eval_f1_for_task1385_anli_textual_entailment": 27.0, + "eval_f1_for_task1386_anli_textual_entailment": 35.0, + "eval_f1_for_task1387_anli_textual_entailment": 37.0, + "eval_f1_for_task1388_cb_textual_entailment": 42.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 57.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 88.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 62.0, + "eval_f1_for_task1407_dart_data_to_text": 34.868, + "eval_f1_for_task1409_dart_data_to_text": 50.3842, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 31.7645, + "eval_f1_for_task1439_doqa_answerability_classification": 42.0, + "eval_f1_for_task1442_doqa_answerability_classification": 54.0, + "eval_f1_for_task1516_imppres_textual_entailment": 60.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 55.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 38.1676, + "eval_f1_for_task1554_scitail_textual_entailment": 64.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.333, + "eval_f1_for_task1562_zest_question_rewriting": 56.5183, + "eval_f1_for_task1586_scifact_title_generation": 39.4453, + "eval_f1_for_task1598_nyc_data_to_text": 52.3314, + "eval_f1_for_task1612_sick_textual_entailment": 53.0, + "eval_f1_for_task1615_sick_textual_entailment": 52.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.0554, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 54.0, + "eval_f1_for_task1631_open_pi_data_to_text": 95.2683, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 72.0, + "eval_f1_for_task1659_billsum_title_generation": 38.3196, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 67.9143, + "eval_f1_for_task1728_web_nlg_data_to_text": 61.4708, + "eval_f1_for_task190_snli_textual_entailment": 2.0, + "eval_f1_for_task199_multinli_textual_entailment": 49.0, + "eval_f1_for_task200_multinli_textual_entailment": 83.0, + "eval_f1_for_task201_multinli_textual_entailment": 10.0, + "eval_f1_for_task202_multinli_textual_entailment": 6.0, + "eval_f1_for_task219_rocstories_title_generation": 15.5349, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, + "eval_f1_for_task232_iirc_answerability_classification": 52.0, + "eval_f1_for_task233_iirc_answerability_classification": 47.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 77.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 62.3833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 34.3318, + "eval_f1_for_task288_gigaword_title_generation": 29.76, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 8.0, + "eval_f1_for_task329_gap_coreference_resolution": 54.0, + "eval_f1_for_task330_gap_coreference_resolution": 67.5905, + "eval_f1_for_task349_squad2.0_answerability_classification": 56.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 79.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 85.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 30.7439, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 36.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 79.3928, + "eval_f1_for_task418_persent_title_generation": 26.7414, + "eval_f1_for_task442_com_qa_question_rewriting": 70.9365, + "eval_f1_for_task500_scruples_title_generation": 20.5258, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 40.9579, + "eval_f1_for_task520_aquamuse_answerability_classification": 53.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 35.2035, + "eval_f1_for_task602_wikitext_title_generation": 14.3057, + "eval_f1_for_task613_liar_keyword_tagging": 24.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 44.7732, + "eval_f1_for_task619_ohsumed_title_generation": 44.3575, + "eval_f1_for_task620_ohsumed_keyword_tagging": 38.9333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 83.0, + "eval_f1_for_task640_e_snli_textual_entailment": 29.0, + "eval_f1_for_task641_e_snli_textual_entailment": 38.0, + "eval_f1_for_task642_e_snli_textual_entailment": 40.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.5325, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 23.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 82.3295, + "eval_f1_for_task671_ambigqa_question_rewriting": 70.6096, + "eval_f1_for_task677_ollie_data_to_text": 31.6867, + "eval_f1_for_task738_perspectrum_textual_entailment": 87.0, + "eval_f1_for_task743_eurlex_title_generation": 36.3831, + "eval_f1_for_task760_msr_sqa_data_to_text": 5.9343, + "eval_f1_for_task769_qed_title_generation": 86.0469, + "eval_f1_for_task827_copa_cause_effect_classification": 87.0, + "eval_f1_for_task828_copa_cause_effect_classification": 63.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_f1_for_task890_gwsd_textual_entailment": 49.0, + "eval_f1_for_task891_gap_coreference_resolution": 64.5333, + "eval_f1_for_task892_gap_coreference_resolution": 38.0, + "eval_f1_for_task893_gap_coreference_resolution": 35.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 70.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task957_e2e_data_to_text": 49.6787, + "eval_f1_for_task970_sherliic_textual_entailment": 77.0, + "eval_f1_for_textual_entailment": 46.75, + "eval_f1_for_title_generation": 37.5419, + "eval_f1_for_word_analogy": 47.7083, + "eval_gen_len": 9.1588, + "eval_global_step": 3000, + "eval_loss": 1.1731315851211548, + "eval_rouge1": 53.5084, + "eval_rouge1_for_answerability_classification": 59.4103, + "eval_rouge1_for_cause_effect_classification": 69.6597, + "eval_rouge1_for_coreference_resolution": 48.4702, + "eval_rouge1_for_data_to_text": 55.2527, + "eval_rouge1_for_dialogue_act_recognition": 54.3889, + "eval_rouge1_for_grammar_error_correction": 62.9984, + "eval_rouge1_for_keyword_tagging": 66.4397, + "eval_rouge1_for_overlap_extraction": 42.232, + "eval_rouge1_for_question_rewriting": 73.3424, + "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 60.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 91.2535, + "eval_rouge1_for_task035_winogrande_question_rewriting": 89.5271, + "eval_rouge1_for_task036_qasc_keyword_tagging": 72.6138, + "eval_rouge1_for_task039_qasc_overlap_extraction": 48.5667, + "eval_rouge1_for_task050_multirc_answerability_classification": 71.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.5795, + "eval_rouge1_for_task1152_bard_word_analogy": 43.0, + "eval_rouge1_for_task1153_bard_word_analogy": 29.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 24.0, + "eval_rouge1_for_task1155_bard_word_analogy": 88.0, + "eval_rouge1_for_task1156_bard_word_analogy": 53.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 58.0, + "eval_rouge1_for_task1158_bard_word_analogy": 43.0, + "eval_rouge1_for_task1159_bard_word_analogy": 42.6667, + "eval_rouge1_for_task1161_coda_19_title_generation": 41.3106, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.2797, + "eval_rouge1_for_task121_atomic_question_rewriting": 52.4161, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 48.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.8771, + "eval_rouge1_for_task1344_rte_textual_entailment": 53.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.2954, + "eval_rouge1_for_task1356_xlsum_title_generation": 26.2024, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.3387, + "eval_rouge1_for_task1385_anli_textual_entailment": 27.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 42.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 57.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 88.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 64.3889, + "eval_rouge1_for_task1407_dart_data_to_text": 36.669, + "eval_rouge1_for_task1409_dart_data_to_text": 51.137, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.5342, + "eval_rouge1_for_task1439_doqa_answerability_classification": 42.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 60.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 55.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 40.5893, + "eval_rouge1_for_task1554_scitail_textual_entailment": 64.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.4625, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.5324, + "eval_rouge1_for_task1586_scifact_title_generation": 43.6173, + "eval_rouge1_for_task1598_nyc_data_to_text": 53.5634, + "eval_rouge1_for_task1612_sick_textual_entailment": 53.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 84.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.3201, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 54.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 95.5269, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 72.0, + "eval_rouge1_for_task1659_billsum_title_generation": 39.9793, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 67.9143, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 63.6628, + "eval_rouge1_for_task190_snli_textual_entailment": 2.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 49.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 83.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 10.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 6.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.0722, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 52.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 47.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 77.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 62.55, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 35.8973, + "eval_rouge1_for_task288_gigaword_title_generation": 33.1777, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 8.0, + "eval_rouge1_for_task329_gap_coreference_resolution": 54.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 67.519, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 56.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 79.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.1019, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 42.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 82.4847, + "eval_rouge1_for_task418_persent_title_generation": 30.5055, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.4808, + "eval_rouge1_for_task500_scruples_title_generation": 21.7962, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 41.4106, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 53.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 38.3518, + "eval_rouge1_for_task602_wikitext_title_generation": 15.1409, + "eval_rouge1_for_task613_liar_keyword_tagging": 37.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 49.5157, + "eval_rouge1_for_task619_ohsumed_title_generation": 47.7972, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 45.5524, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 83.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 29.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 38.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 40.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.0325, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 23.5, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 83.0598, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 71.1164, + "eval_rouge1_for_task677_ollie_data_to_text": 34.6553, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 87.0, + "eval_rouge1_for_task743_eurlex_title_generation": 38.2954, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.4652, + "eval_rouge1_for_task769_qed_title_generation": 86.0811, + "eval_rouge1_for_task827_copa_cause_effect_classification": 87.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 63.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 49.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 64.7667, + "eval_rouge1_for_task892_gap_coreference_resolution": 38.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 70.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task957_e2e_data_to_text": 51.912, + "eval_rouge1_for_task970_sherliic_textual_entailment": 77.0, + "eval_rouge1_for_textual_entailment": 48.0833, + "eval_rouge1_for_title_generation": 39.9171, + "eval_rouge1_for_word_analogy": 47.7083, + "eval_rougeL": 52.1734, + "eval_rougeL_for_answerability_classification": 59.4103, + "eval_rougeL_for_cause_effect_classification": 69.1074, + "eval_rougeL_for_coreference_resolution": 48.4702, + "eval_rougeL_for_data_to_text": 48.317, + "eval_rougeL_for_dialogue_act_recognition": 54.3889, + "eval_rougeL_for_grammar_error_correction": 62.1284, + "eval_rougeL_for_keyword_tagging": 66.0953, + "eval_rougeL_for_overlap_extraction": 41.6549, + "eval_rougeL_for_question_rewriting": 69.6927, + "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 60.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.0545, + "eval_rougeL_for_task035_winogrande_question_rewriting": 89.3201, + "eval_rougeL_for_task036_qasc_keyword_tagging": 72.0582, + "eval_rougeL_for_task039_qasc_overlap_extraction": 48.5667, + "eval_rougeL_for_task050_multirc_answerability_classification": 71.0, + "eval_rougeL_for_task102_commongen_data_to_text": 58.4865, + "eval_rougeL_for_task1152_bard_word_analogy": 43.0, + "eval_rougeL_for_task1153_bard_word_analogy": 29.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 24.0, + "eval_rougeL_for_task1155_bard_word_analogy": 88.0, + "eval_rougeL_for_task1156_bard_word_analogy": 53.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 58.0, + "eval_rougeL_for_task1158_bard_word_analogy": 43.0, + "eval_rougeL_for_task1159_bard_word_analogy": 42.6667, + "eval_rougeL_for_task1161_coda_19_title_generation": 35.4917, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.5681, + "eval_rougeL_for_task121_atomic_question_rewriting": 48.1076, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 48.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.0271, + "eval_rougeL_for_task1344_rte_textual_entailment": 53.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.2062, + "eval_rougeL_for_task1356_xlsum_title_generation": 22.1659, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.1824, + "eval_rougeL_for_task1385_anli_textual_entailment": 27.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 42.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 57.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 88.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 64.3889, + "eval_rougeL_for_task1407_dart_data_to_text": 32.6581, + "eval_rougeL_for_task1409_dart_data_to_text": 44.5654, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.7105, + "eval_rougeL_for_task1439_doqa_answerability_classification": 42.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 60.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 55.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 38.4356, + "eval_rougeL_for_task1554_scitail_textual_entailment": 64.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.5464, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.0924, + "eval_rougeL_for_task1586_scifact_title_generation": 37.1408, + "eval_rougeL_for_task1598_nyc_data_to_text": 42.0698, + "eval_rougeL_for_task1612_sick_textual_entailment": 53.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 84.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.0878, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 54.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 95.1353, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 72.0, + "eval_rougeL_for_task1659_billsum_title_generation": 34.7225, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 67.9143, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 56.0385, + "eval_rougeL_for_task190_snli_textual_entailment": 2.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 49.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 83.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 10.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 6.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.0722, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 52.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 47.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 77.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 62.55, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 34.7431, + "eval_rougeL_for_task288_gigaword_title_generation": 28.6461, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 8.0, + "eval_rougeL_for_task329_gap_coreference_resolution": 54.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 67.519, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 56.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 79.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.2556, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 42.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 65.7598, + "eval_rougeL_for_task418_persent_title_generation": 27.3548, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.4265, + "eval_rougeL_for_task500_scruples_title_generation": 20.9096, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 40.8725, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 53.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 35.2731, + "eval_rougeL_for_task602_wikitext_title_generation": 15.1409, + "eval_rougeL_for_task613_liar_keyword_tagging": 37.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 46.496, + "eval_rougeL_for_task619_ohsumed_title_generation": 41.1987, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 44.3857, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 83.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 29.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 38.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 40.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.0325, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 23.5, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 82.5476, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 70.4485, + "eval_rougeL_for_task677_ollie_data_to_text": 28.39, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 87.0, + "eval_rougeL_for_task743_eurlex_title_generation": 33.7421, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.0454, + "eval_rougeL_for_task769_qed_title_generation": 86.0811, + "eval_rougeL_for_task827_copa_cause_effect_classification": 87.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 63.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 52.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 49.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 64.7667, + "eval_rougeL_for_task892_gap_coreference_resolution": 38.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 70.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.4431, + "eval_rougeL_for_task970_sherliic_textual_entailment": 77.0, + "eval_rougeL_for_textual_entailment": 48.0833, + "eval_rougeL_for_title_generation": 36.9414, + "eval_rougeL_for_word_analogy": 47.7083, + "eval_runtime": 862.2168, + "eval_samples_per_second": 13.813, + "eval_steps_per_second": 0.864, + "step": 3000 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.9159, + "step": 3500 + }, + { + "epoch": 0.76, + "eval_exact_match": 32.5189, + "eval_exact_match_for_answerability_classification": 56.0769, + "eval_exact_match_for_cause_effect_classification": 51.5714, + "eval_exact_match_for_coreference_resolution": 41.0, + "eval_exact_match_for_data_to_text": 5.0847, + "eval_exact_match_for_dialogue_act_recognition": 49.7143, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 42.8, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 2.2727, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 56.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 8.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 22.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 60.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 34.0, + "eval_exact_match_for_task1153_bard_word_analogy": 22.0, + "eval_exact_match_for_task1154_bard_word_analogy": 19.0, + "eval_exact_match_for_task1155_bard_word_analogy": 81.0, + "eval_exact_match_for_task1156_bard_word_analogy": 62.0, + "eval_exact_match_for_task1157_bard_word_analogy": 51.0, + "eval_exact_match_for_task1158_bard_word_analogy": 42.0, + "eval_exact_match_for_task1159_bard_word_analogy": 38.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 58.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 57.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 29.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 41.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 89.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 59.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 2.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 44.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 56.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 42.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 54.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 32.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 55.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 56.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 47.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 34.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 69.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 13.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 2.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 77.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 12.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 8.0, + "eval_exact_match_for_task219_rocstories_title_generation": 5.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 70.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 52.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 66.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 57.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 64.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 62.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 1.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 24.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 1.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 2.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 63.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 74.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 25.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 58.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 36.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 13.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 40.0, + "eval_exact_match_for_task743_eurlex_title_generation": 3.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 65.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 85.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 60.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 71.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 51.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 56.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 36.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 62.0, + "eval_exact_match_for_textual_entailment": 41.625, + "eval_exact_match_for_title_generation": 10.9305, + "eval_exact_match_for_word_analogy": 43.625, + "eval_f1": 50.652, + "eval_f1_for_answerability_classification": 58.641, + "eval_f1_for_cause_effect_classification": 68.0923, + "eval_f1_for_coreference_resolution": 47.3, + "eval_f1_for_data_to_text": 51.613, + "eval_f1_for_dialogue_act_recognition": 53.5714, + "eval_f1_for_grammar_error_correction": 74.5364, + "eval_f1_for_keyword_tagging": 58.9024, + "eval_f1_for_overlap_extraction": 32.5919, + "eval_f1_for_question_rewriting": 72.1492, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 58.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 92.6207, + "eval_f1_for_task035_winogrande_question_rewriting": 89.4918, + "eval_f1_for_task036_qasc_keyword_tagging": 65.0884, + "eval_f1_for_task039_qasc_overlap_extraction": 32.6333, + "eval_f1_for_task050_multirc_answerability_classification": 60.0, + "eval_f1_for_task102_commongen_data_to_text": 53.5756, + "eval_f1_for_task1152_bard_word_analogy": 34.0, + "eval_f1_for_task1153_bard_word_analogy": 24.0, + "eval_f1_for_task1154_bard_word_analogy": 19.0, + "eval_f1_for_task1155_bard_word_analogy": 81.0, + "eval_f1_for_task1156_bard_word_analogy": 62.6667, + "eval_f1_for_task1157_bard_word_analogy": 51.0, + "eval_f1_for_task1158_bard_word_analogy": 42.0, + "eval_f1_for_task1159_bard_word_analogy": 38.6667, + "eval_f1_for_task1161_coda_19_title_generation": 38.0972, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.8276, + "eval_f1_for_task121_atomic_question_rewriting": 49.847, + "eval_f1_for_task133_winowhy_coreference_resolution": 58.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.7485, + "eval_f1_for_task1344_rte_textual_entailment": 57.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.3501, + "eval_f1_for_task1356_xlsum_title_generation": 23.3905, + "eval_f1_for_task1358_xlsum_title_generation": 34.7545, + "eval_f1_for_task1385_anli_textual_entailment": 29.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 35.0, + "eval_f1_for_task1388_cb_textual_entailment": 41.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 89.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 59.0, + "eval_f1_for_task1407_dart_data_to_text": 36.6434, + "eval_f1_for_task1409_dart_data_to_text": 49.6605, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 64.3826, + "eval_f1_for_task1439_doqa_answerability_classification": 44.0, + "eval_f1_for_task1442_doqa_answerability_classification": 56.0, + "eval_f1_for_task1516_imppres_textual_entailment": 42.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 54.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1540_peer_read_title_generation": 36.1225, + "eval_f1_for_task1554_scitail_textual_entailment": 55.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.6901, + "eval_f1_for_task1562_zest_question_rewriting": 57.1015, + "eval_f1_for_task1586_scifact_title_generation": 38.696, + "eval_f1_for_task1598_nyc_data_to_text": 51.3856, + "eval_f1_for_task1612_sick_textual_entailment": 56.0, + "eval_f1_for_task1615_sick_textual_entailment": 47.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.4359, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_f1_for_task1631_open_pi_data_to_text": 91.0842, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 69.0, + "eval_f1_for_task1659_billsum_title_generation": 37.2091, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 65.4119, + "eval_f1_for_task1728_web_nlg_data_to_text": 60.1543, + "eval_f1_for_task190_snli_textual_entailment": 2.0, + "eval_f1_for_task199_multinli_textual_entailment": 33.0, + "eval_f1_for_task200_multinli_textual_entailment": 77.0, + "eval_f1_for_task201_multinli_textual_entailment": 12.0, + "eval_f1_for_task202_multinli_textual_entailment": 8.0, + "eval_f1_for_task219_rocstories_title_generation": 20.5746, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_f1_for_task232_iirc_answerability_classification": 49.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 70.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 64.3833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.5505, + "eval_f1_for_task288_gigaword_title_generation": 30.3861, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 6.7333, + "eval_f1_for_task329_gap_coreference_resolution": 51.0, + "eval_f1_for_task330_gap_coreference_resolution": 71.019, + "eval_f1_for_task349_squad2.0_answerability_classification": 57.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 72.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 88.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 34.7303, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 27.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 80.8853, + "eval_f1_for_task418_persent_title_generation": 28.7988, + "eval_f1_for_task442_com_qa_question_rewriting": 71.3524, + "eval_f1_for_task500_scruples_title_generation": 20.6894, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.2854, + "eval_f1_for_task520_aquamuse_answerability_classification": 63.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 35.2915, + "eval_f1_for_task602_wikitext_title_generation": 13.8283, + "eval_f1_for_task613_liar_keyword_tagging": 22.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 32.5827, + "eval_f1_for_task619_ohsumed_title_generation": 45.0961, + "eval_f1_for_task620_ohsumed_keyword_tagging": 38.0667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 74.0, + "eval_f1_for_task640_e_snli_textual_entailment": 25.0, + "eval_f1_for_task641_e_snli_textual_entailment": 58.0, + "eval_f1_for_task642_e_snli_textual_entailment": 36.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 95.3571, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 16.1667, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.9904, + "eval_f1_for_task671_ambigqa_question_rewriting": 69.7385, + "eval_f1_for_task677_ollie_data_to_text": 31.4619, + "eval_f1_for_task738_perspectrum_textual_entailment": 40.0, + "eval_f1_for_task743_eurlex_title_generation": 38.7305, + "eval_f1_for_task760_msr_sqa_data_to_text": 5.6061, + "eval_f1_for_task769_qed_title_generation": 81.918, + "eval_f1_for_task827_copa_cause_effect_classification": 85.0, + "eval_f1_for_task828_copa_cause_effect_classification": 60.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 71.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.0, + "eval_f1_for_task890_gwsd_textual_entailment": 51.0, + "eval_f1_for_task891_gap_coreference_resolution": 63.6524, + "eval_f1_for_task892_gap_coreference_resolution": 36.0, + "eval_f1_for_task893_gap_coreference_resolution": 31.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 50.9004, + "eval_f1_for_task970_sherliic_textual_entailment": 62.0, + "eval_f1_for_textual_entailment": 41.625, + "eval_f1_for_title_generation": 37.9151, + "eval_f1_for_word_analogy": 44.0417, + "eval_gen_len": 9.049, + "eval_global_step": 3500, + "eval_loss": 1.174275517463684, + "eval_rouge1": 52.4604, + "eval_rouge1_for_answerability_classification": 58.641, + "eval_rouge1_for_cause_effect_classification": 68.4553, + "eval_rouge1_for_coreference_resolution": 47.5905, + "eval_rouge1_for_data_to_text": 54.593, + "eval_rouge1_for_dialogue_act_recognition": 56.1127, + "eval_rouge1_for_grammar_error_correction": 76.4033, + "eval_rouge1_for_keyword_tagging": 63.2609, + "eval_rouge1_for_overlap_extraction": 34.256, + "eval_rouge1_for_question_rewriting": 73.7595, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 58.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.6626, + "eval_rouge1_for_task035_winogrande_question_rewriting": 90.0839, + "eval_rouge1_for_task036_qasc_keyword_tagging": 69.2614, + "eval_rouge1_for_task039_qasc_overlap_extraction": 34.6333, + "eval_rouge1_for_task050_multirc_answerability_classification": 60.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.8912, + "eval_rouge1_for_task1152_bard_word_analogy": 34.0, + "eval_rouge1_for_task1153_bard_word_analogy": 24.0, + "eval_rouge1_for_task1154_bard_word_analogy": 19.0, + "eval_rouge1_for_task1155_bard_word_analogy": 81.0, + "eval_rouge1_for_task1156_bard_word_analogy": 62.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 51.0, + "eval_rouge1_for_task1158_bard_word_analogy": 42.0, + "eval_rouge1_for_task1159_bard_word_analogy": 38.6667, + "eval_rouge1_for_task1161_coda_19_title_generation": 41.7452, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 81.9947, + "eval_rouge1_for_task121_atomic_question_rewriting": 52.8737, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 58.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.24, + "eval_rouge1_for_task1344_rte_textual_entailment": 57.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.815, + "eval_rouge1_for_task1356_xlsum_title_generation": 26.9634, + "eval_rouge1_for_task1358_xlsum_title_generation": 39.1088, + "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 89.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 64.7889, + "eval_rouge1_for_task1407_dart_data_to_text": 37.2101, + "eval_rouge1_for_task1409_dart_data_to_text": 50.124, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 64.999, + "eval_rouge1_for_task1439_doqa_answerability_classification": 44.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 56.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 42.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 54.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 38.9424, + "eval_rouge1_for_task1554_scitail_textual_entailment": 55.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.8077, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.5362, + "eval_rouge1_for_task1586_scifact_title_generation": 42.5504, + "eval_rouge1_for_task1598_nyc_data_to_text": 52.3522, + "eval_rouge1_for_task1612_sick_textual_entailment": 56.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 82.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.8364, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 91.1809, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 69.0, + "eval_rouge1_for_task1659_billsum_title_generation": 38.5028, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 65.4119, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 61.8666, + "eval_rouge1_for_task190_snli_textual_entailment": 2.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 77.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 12.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 8.0, + "eval_rouge1_for_task219_rocstories_title_generation": 23.8405, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 70.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 64.55, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 33.8788, + "eval_rouge1_for_task288_gigaword_title_generation": 32.9739, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 6.7333, + "eval_rouge1_for_task329_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 71.019, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 57.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 72.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 88.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 35.3163, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 31.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 83.397, + "eval_rouge1_for_task418_persent_title_generation": 32.9613, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.7721, + "eval_rouge1_for_task500_scruples_title_generation": 22.5923, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.8307, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 63.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 37.545, + "eval_rouge1_for_task602_wikitext_title_generation": 14.3838, + "eval_rouge1_for_task613_liar_keyword_tagging": 32.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 34.5375, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.646, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.519, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 74.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 25.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 58.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 36.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.8571, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 16.0667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.8124, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 70.5702, + "eval_rouge1_for_task677_ollie_data_to_text": 35.0369, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 84.0, + "eval_rouge1_for_task743_eurlex_title_generation": 40.8781, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.004, + "eval_rouge1_for_task769_qed_title_generation": 81.4716, + "eval_rouge1_for_task827_copa_cause_effect_classification": 85.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 60.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 71.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 51.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 63.6524, + "eval_rouge1_for_task892_gap_coreference_resolution": 36.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 53.7153, + "eval_rouge1_for_task970_sherliic_textual_entailment": 62.0, + "eval_rouge1_for_textual_entailment": 44.9306, + "eval_rouge1_for_title_generation": 40.1836, + "eval_rouge1_for_word_analogy": 44.0417, + "eval_rougeL": 51.0518, + "eval_rougeL_for_answerability_classification": 58.641, + "eval_rougeL_for_cause_effect_classification": 68.0965, + "eval_rougeL_for_coreference_resolution": 47.5905, + "eval_rougeL_for_data_to_text": 47.0014, + "eval_rougeL_for_dialogue_act_recognition": 56.1127, + "eval_rougeL_for_grammar_error_correction": 75.2167, + "eval_rougeL_for_keyword_tagging": 62.6558, + "eval_rougeL_for_overlap_extraction": 33.8351, + "eval_rougeL_for_question_rewriting": 70.1469, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 58.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.5573, + "eval_rougeL_for_task035_winogrande_question_rewriting": 89.8446, + "eval_rougeL_for_task036_qasc_keyword_tagging": 67.9027, + "eval_rougeL_for_task039_qasc_overlap_extraction": 34.6333, + "eval_rougeL_for_task050_multirc_answerability_classification": 60.0, + "eval_rougeL_for_task102_commongen_data_to_text": 55.634, + "eval_rougeL_for_task1152_bard_word_analogy": 34.0, + "eval_rougeL_for_task1153_bard_word_analogy": 24.0, + "eval_rougeL_for_task1154_bard_word_analogy": 19.0, + "eval_rougeL_for_task1155_bard_word_analogy": 81.0, + "eval_rougeL_for_task1156_bard_word_analogy": 62.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 51.0, + "eval_rougeL_for_task1158_bard_word_analogy": 42.0, + "eval_rougeL_for_task1159_bard_word_analogy": 38.6667, + "eval_rougeL_for_task1161_coda_19_title_generation": 34.6947, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 81.5918, + "eval_rougeL_for_task121_atomic_question_rewriting": 48.4074, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 58.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.5779, + "eval_rougeL_for_task1344_rte_textual_entailment": 57.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.8141, + "eval_rougeL_for_task1356_xlsum_title_generation": 23.7664, + "eval_rougeL_for_task1358_xlsum_title_generation": 33.428, + "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 89.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 64.7889, + "eval_rougeL_for_task1407_dart_data_to_text": 32.2893, + "eval_rougeL_for_task1409_dart_data_to_text": 43.7186, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 63.4907, + "eval_rougeL_for_task1439_doqa_answerability_classification": 44.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 56.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 42.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 54.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 54.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 36.6739, + "eval_rougeL_for_task1554_scitail_textual_entailment": 55.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.9428, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.8999, + "eval_rougeL_for_task1586_scifact_title_generation": 35.6042, + "eval_rougeL_for_task1598_nyc_data_to_text": 40.9721, + "eval_rougeL_for_task1612_sick_textual_entailment": 56.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 82.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.7443, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 90.0909, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 69.0, + "eval_rougeL_for_task1659_billsum_title_generation": 33.339, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 65.4119, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 54.1655, + "eval_rougeL_for_task190_snli_textual_entailment": 2.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 77.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 12.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 8.0, + "eval_rougeL_for_task219_rocstories_title_generation": 23.4405, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 70.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 64.55, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.0369, + "eval_rougeL_for_task288_gigaword_title_generation": 28.2689, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 6.7333, + "eval_rougeL_for_task329_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 71.019, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 57.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 72.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 88.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 34.3369, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 31.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 66.2687, + "eval_rougeL_for_task418_persent_title_generation": 28.485, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.488, + "eval_rougeL_for_task500_scruples_title_generation": 21.1612, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.7583, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 63.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 36.3609, + "eval_rougeL_for_task602_wikitext_title_generation": 14.3838, + "eval_rougeL_for_task613_liar_keyword_tagging": 32.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 33.005, + "eval_rougeL_for_task619_ohsumed_title_generation": 42.2027, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 42.8524, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 74.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 25.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 58.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 36.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.8571, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 16.0667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 82.2233, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 69.7768, + "eval_rougeL_for_task677_ollie_data_to_text": 28.8008, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 84.0, + "eval_rougeL_for_task743_eurlex_title_generation": 34.9983, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.2431, + "eval_rougeL_for_task769_qed_title_generation": 81.4716, + "eval_rougeL_for_task827_copa_cause_effect_classification": 85.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 60.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 71.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 51.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 63.6524, + "eval_rougeL_for_task892_gap_coreference_resolution": 36.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.1976, + "eval_rougeL_for_task970_sherliic_textual_entailment": 62.0, + "eval_rougeL_for_textual_entailment": 44.9306, + "eval_rougeL_for_title_generation": 37.0131, + "eval_rougeL_for_word_analogy": 44.0417, + "eval_runtime": 895.5344, + "eval_samples_per_second": 13.299, + "eval_steps_per_second": 0.832, + "step": 3500 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.8838, + "step": 4000 + }, + { + "epoch": 0.87, + "eval_exact_match": 34.6096, + "eval_exact_match_for_answerability_classification": 56.6923, + "eval_exact_match_for_cause_effect_classification": 49.4286, + "eval_exact_match_for_coreference_resolution": 45.0714, + "eval_exact_match_for_data_to_text": 7.6271, + "eval_exact_match_for_dialogue_act_recognition": 50.7143, + "eval_exact_match_for_grammar_error_correction": 6.5, + "eval_exact_match_for_keyword_tagging": 45.6, + "eval_exact_match_for_overlap_extraction": 15.0, + "eval_exact_match_for_question_rewriting": 3.6364, + "eval_exact_match_for_task020_mctaco_answerability_classification": 58.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 56.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 7.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 25.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 30.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 61.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 37.0, + "eval_exact_match_for_task1153_bard_word_analogy": 27.0, + "eval_exact_match_for_task1154_bard_word_analogy": 24.0, + "eval_exact_match_for_task1155_bard_word_analogy": 79.0, + "eval_exact_match_for_task1156_bard_word_analogy": 57.0, + "eval_exact_match_for_task1157_bard_word_analogy": 59.0, + "eval_exact_match_for_task1158_bard_word_analogy": 45.0, + "eval_exact_match_for_task1159_bard_word_analogy": 44.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 14.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 62.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 78.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 26.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 36.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 76.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 45.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 38.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 67.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 74.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 52.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 56.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 8.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 59.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 55.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 73.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 27.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 18.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 39.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 68.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 25.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 6.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 94.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 26.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 40.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 91.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 53.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 49.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 6.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 66.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 60.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 40.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 65.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 61.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 23.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 5.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 4.0, + "eval_exact_match_for_task500_scruples_title_generation": 2.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 63.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 5.9524, + "eval_exact_match_for_task613_liar_keyword_tagging": 25.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 17.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 76.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 60.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 37.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 29.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 44.0, + "eval_exact_match_for_task743_eurlex_title_generation": 3.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 62.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 84.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 60.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 74.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 56.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 63.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 37.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 76.0, + "eval_exact_match_for_textual_entailment": 46.7083, + "eval_exact_match_for_title_generation": 10.426, + "eval_exact_match_for_word_analogy": 46.5, + "eval_f1": 52.3751, + "eval_f1_for_answerability_classification": 59.3077, + "eval_f1_for_cause_effect_classification": 67.9827, + "eval_f1_for_coreference_resolution": 51.8139, + "eval_f1_for_data_to_text": 54.142, + "eval_f1_for_dialogue_act_recognition": 53.8571, + "eval_f1_for_grammar_error_correction": 67.4126, + "eval_f1_for_keyword_tagging": 59.4307, + "eval_f1_for_overlap_extraction": 32.1947, + "eval_f1_for_question_rewriting": 70.7426, + "eval_f1_for_task020_mctaco_answerability_classification": 58.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 59.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 83.9029, + "eval_f1_for_task035_winogrande_question_rewriting": 87.7241, + "eval_f1_for_task036_qasc_keyword_tagging": 61.5343, + "eval_f1_for_task039_qasc_overlap_extraction": 32.0, + "eval_f1_for_task050_multirc_answerability_classification": 61.0, + "eval_f1_for_task102_commongen_data_to_text": 51.4961, + "eval_f1_for_task1152_bard_word_analogy": 37.0, + "eval_f1_for_task1153_bard_word_analogy": 27.6667, + "eval_f1_for_task1154_bard_word_analogy": 24.0, + "eval_f1_for_task1155_bard_word_analogy": 79.0, + "eval_f1_for_task1156_bard_word_analogy": 59.0, + "eval_f1_for_task1157_bard_word_analogy": 59.0, + "eval_f1_for_task1158_bard_word_analogy": 45.0, + "eval_f1_for_task1159_bard_word_analogy": 44.0, + "eval_f1_for_task1161_coda_19_title_generation": 38.604, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 82.3205, + "eval_f1_for_task121_atomic_question_rewriting": 50.6464, + "eval_f1_for_task133_winowhy_coreference_resolution": 62.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.0155, + "eval_f1_for_task1344_rte_textual_entailment": 78.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.313, + "eval_f1_for_task1356_xlsum_title_generation": 26.1827, + "eval_f1_for_task1358_xlsum_title_generation": 34.5584, + "eval_f1_for_task1385_anli_textual_entailment": 26.0, + "eval_f1_for_task1386_anli_textual_entailment": 32.0, + "eval_f1_for_task1387_anli_textual_entailment": 36.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 76.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_f1_for_task1407_dart_data_to_text": 39.7751, + "eval_f1_for_task1409_dart_data_to_text": 52.6924, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 50.0191, + "eval_f1_for_task1439_doqa_answerability_classification": 45.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 38.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 67.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 39.9288, + "eval_f1_for_task1554_scitail_textual_entailment": 74.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.8061, + "eval_f1_for_task1562_zest_question_rewriting": 55.6652, + "eval_f1_for_task1586_scifact_title_generation": 37.0452, + "eval_f1_for_task1598_nyc_data_to_text": 52.4405, + "eval_f1_for_task1612_sick_textual_entailment": 52.0, + "eval_f1_for_task1615_sick_textual_entailment": 56.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.5105, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 59.0, + "eval_f1_for_task1631_open_pi_data_to_text": 95.5861, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 73.0, + "eval_f1_for_task1659_billsum_title_generation": 37.1512, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 72.1587, + "eval_f1_for_task1728_web_nlg_data_to_text": 66.4367, + "eval_f1_for_task190_snli_textual_entailment": 18.0, + "eval_f1_for_task199_multinli_textual_entailment": 39.0, + "eval_f1_for_task200_multinli_textual_entailment": 68.0, + "eval_f1_for_task201_multinli_textual_entailment": 25.0, + "eval_f1_for_task202_multinli_textual_entailment": 6.0, + "eval_f1_for_task219_rocstories_title_generation": 20.6452, + "eval_f1_for_task220_rocstories_title_generation": 94.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, + "eval_f1_for_task232_iirc_answerability_classification": 26.0, + "eval_f1_for_task233_iirc_answerability_classification": 40.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 91.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 60.8833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.3894, + "eval_f1_for_task288_gigaword_title_generation": 28.7713, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 7.3333, + "eval_f1_for_task329_gap_coreference_resolution": 51.0, + "eval_f1_for_task330_gap_coreference_resolution": 73.6857, + "eval_f1_for_task349_squad2.0_answerability_classification": 60.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 62.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 88.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 87.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 36.0531, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 25.0, + "eval_f1_for_task402_grailqa_question_rewriting": 77.9916, + "eval_f1_for_task418_persent_title_generation": 31.4973, + "eval_f1_for_task442_com_qa_question_rewriting": 70.9279, + "eval_f1_for_task500_scruples_title_generation": 18.5534, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.205, + "eval_f1_for_task520_aquamuse_answerability_classification": 63.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 38.058, + "eval_f1_for_task602_wikitext_title_generation": 12.681, + "eval_f1_for_task613_liar_keyword_tagging": 27.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 44.4922, + "eval_f1_for_task619_ohsumed_title_generation": 47.6213, + "eval_f1_for_task620_ohsumed_keyword_tagging": 40.1, + "eval_f1_for_task623_ohsumed_keyword_tagging": 76.0, + "eval_f1_for_task640_e_snli_textual_entailment": 33.0, + "eval_f1_for_task641_e_snli_textual_entailment": 60.0, + "eval_f1_for_task642_e_snli_textual_entailment": 37.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 91.8524, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 45.4667, + "eval_f1_for_task670_ambigqa_question_rewriting": 80.8645, + "eval_f1_for_task671_ambigqa_question_rewriting": 69.3016, + "eval_f1_for_task677_ollie_data_to_text": 34.2781, + "eval_f1_for_task738_perspectrum_textual_entailment": 44.0, + "eval_f1_for_task743_eurlex_title_generation": 39.8296, + "eval_f1_for_task760_msr_sqa_data_to_text": 8.1304, + "eval_f1_for_task769_qed_title_generation": 70.8627, + "eval_f1_for_task827_copa_cause_effect_classification": 84.0, + "eval_f1_for_task828_copa_cause_effect_classification": 60.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 74.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.0, + "eval_f1_for_task890_gwsd_textual_entailment": 56.0, + "eval_f1_for_task891_gap_coreference_resolution": 73.5333, + "eval_f1_for_task892_gap_coreference_resolution": 45.0, + "eval_f1_for_task893_gap_coreference_resolution": 37.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 52.3943, + "eval_f1_for_task970_sherliic_textual_entailment": 76.0, + "eval_f1_for_textual_entailment": 46.7083, + "eval_f1_for_title_generation": 37.6223, + "eval_f1_for_word_analogy": 46.8333, + "eval_gen_len": 9.0433, + "eval_global_step": 4000, + "eval_loss": 1.1663141250610352, + "eval_rouge1": 54.2736, + "eval_rouge1_for_answerability_classification": 59.3077, + "eval_rouge1_for_cause_effect_classification": 68.7796, + "eval_rouge1_for_coreference_resolution": 52.4846, + "eval_rouge1_for_data_to_text": 56.9239, + "eval_rouge1_for_dialogue_act_recognition": 56.3333, + "eval_rouge1_for_grammar_error_correction": 69.9743, + "eval_rouge1_for_keyword_tagging": 64.8107, + "eval_rouge1_for_overlap_extraction": 35.896, + "eval_rouge1_for_question_rewriting": 72.3172, + "eval_rouge1_for_task020_mctaco_answerability_classification": 58.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 59.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 83.9418, + "eval_rouge1_for_task035_winogrande_question_rewriting": 88.3938, + "eval_rouge1_for_task036_qasc_keyword_tagging": 69.6819, + "eval_rouge1_for_task039_qasc_overlap_extraction": 38.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 61.0, + "eval_rouge1_for_task102_commongen_data_to_text": 65.6722, + "eval_rouge1_for_task1152_bard_word_analogy": 37.0, + "eval_rouge1_for_task1153_bard_word_analogy": 28.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 24.0, + "eval_rouge1_for_task1155_bard_word_analogy": 79.0, + "eval_rouge1_for_task1156_bard_word_analogy": 59.0, + "eval_rouge1_for_task1157_bard_word_analogy": 59.0, + "eval_rouge1_for_task1158_bard_word_analogy": 45.0, + "eval_rouge1_for_task1159_bard_word_analogy": 44.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 42.5494, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 82.5244, + "eval_rouge1_for_task121_atomic_question_rewriting": 52.6495, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 62.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.8256, + "eval_rouge1_for_task1344_rte_textual_entailment": 78.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.4692, + "eval_rouge1_for_task1356_xlsum_title_generation": 30.9244, + "eval_rouge1_for_task1358_xlsum_title_generation": 38.0857, + "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 39.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 76.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 65.0, + "eval_rouge1_for_task1407_dart_data_to_text": 40.3509, + "eval_rouge1_for_task1409_dart_data_to_text": 53.5044, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 52.2304, + "eval_rouge1_for_task1439_doqa_answerability_classification": 45.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 38.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 67.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 42.5606, + "eval_rouge1_for_task1554_scitail_textual_entailment": 74.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.7183, + "eval_rouge1_for_task1562_zest_question_rewriting": 58.871, + "eval_rouge1_for_task1586_scifact_title_generation": 41.2925, + "eval_rouge1_for_task1598_nyc_data_to_text": 53.1417, + "eval_rouge1_for_task1612_sick_textual_entailment": 52.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 85.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.8565, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 59.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 95.6708, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 73.0, + "eval_rouge1_for_task1659_billsum_title_generation": 38.7672, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 72.1111, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 67.983, + "eval_rouge1_for_task190_snli_textual_entailment": 18.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 39.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 68.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 25.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 6.0, + "eval_rouge1_for_task219_rocstories_title_generation": 24.2437, + "eval_rouge1_for_task220_rocstories_title_generation": 94.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 26.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 40.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 91.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 61.55, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 33.7921, + "eval_rouge1_for_task288_gigaword_title_generation": 31.7946, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 7.3333, + "eval_rouge1_for_task329_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 73.4238, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 60.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 62.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 88.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 36.6137, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 34.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 80.2148, + "eval_rouge1_for_task418_persent_title_generation": 34.6078, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.4553, + "eval_rouge1_for_task500_scruples_title_generation": 20.6456, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.2465, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 63.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 39.9213, + "eval_rouge1_for_task602_wikitext_title_generation": 13.4883, + "eval_rouge1_for_task613_liar_keyword_tagging": 38.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 49.5101, + "eval_rouge1_for_task619_ohsumed_title_generation": 51.0387, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 47.3524, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 76.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 60.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 37.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 92.3524, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 45.6, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.0206, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 70.0927, + "eval_rouge1_for_task677_ollie_data_to_text": 37.0245, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 80.0, + "eval_rouge1_for_task743_eurlex_title_generation": 41.22, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 8.6405, + "eval_rouge1_for_task769_qed_title_generation": 70.8829, + "eval_rouge1_for_task827_copa_cause_effect_classification": 84.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 60.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 74.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 46.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 56.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 73.4333, + "eval_rouge1_for_task892_gap_coreference_resolution": 45.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 37.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 54.5978, + "eval_rouge1_for_task970_sherliic_textual_entailment": 76.0, + "eval_rouge1_for_textual_entailment": 49.7222, + "eval_rouge1_for_title_generation": 39.9068, + "eval_rouge1_for_word_analogy": 46.9583, + "eval_rougeL": 52.8194, + "eval_rougeL_for_answerability_classification": 59.3077, + "eval_rougeL_for_cause_effect_classification": 68.0266, + "eval_rougeL_for_coreference_resolution": 52.4846, + "eval_rougeL_for_data_to_text": 48.7104, + "eval_rougeL_for_dialogue_act_recognition": 56.3333, + "eval_rougeL_for_grammar_error_correction": 68.6029, + "eval_rougeL_for_keyword_tagging": 64.1378, + "eval_rougeL_for_overlap_extraction": 35.3195, + "eval_rougeL_for_question_rewriting": 68.7334, + "eval_rougeL_for_task020_mctaco_answerability_classification": 58.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 59.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 83.3471, + "eval_rougeL_for_task035_winogrande_question_rewriting": 87.7605, + "eval_rougeL_for_task036_qasc_keyword_tagging": 67.9844, + "eval_rougeL_for_task039_qasc_overlap_extraction": 38.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 61.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.9083, + "eval_rougeL_for_task1152_bard_word_analogy": 37.0, + "eval_rougeL_for_task1153_bard_word_analogy": 28.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 24.0, + "eval_rougeL_for_task1155_bard_word_analogy": 79.0, + "eval_rougeL_for_task1156_bard_word_analogy": 59.0, + "eval_rougeL_for_task1157_bard_word_analogy": 59.0, + "eval_rougeL_for_task1158_bard_word_analogy": 45.0, + "eval_rougeL_for_task1159_bard_word_analogy": 44.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 35.3746, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 81.7093, + "eval_rougeL_for_task121_atomic_question_rewriting": 46.0157, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 62.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.2741, + "eval_rougeL_for_task1344_rte_textual_entailment": 78.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.8063, + "eval_rougeL_for_task1356_xlsum_title_generation": 26.9712, + "eval_rougeL_for_task1358_xlsum_title_generation": 33.2255, + "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 39.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 76.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 65.0, + "eval_rougeL_for_task1407_dart_data_to_text": 33.6151, + "eval_rougeL_for_task1409_dart_data_to_text": 44.3596, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 50.3577, + "eval_rougeL_for_task1439_doqa_answerability_classification": 45.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 38.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 67.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 39.2852, + "eval_rougeL_for_task1554_scitail_textual_entailment": 74.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.8482, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.5206, + "eval_rougeL_for_task1586_scifact_title_generation": 34.7288, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.2206, + "eval_rougeL_for_task1612_sick_textual_entailment": 52.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 85.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.9777, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 59.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 92.9485, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 73.0, + "eval_rougeL_for_task1659_billsum_title_generation": 33.8831, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 72.1111, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 59.1469, + "eval_rougeL_for_task190_snli_textual_entailment": 18.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 39.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 68.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 25.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 6.0, + "eval_rougeL_for_task219_rocstories_title_generation": 24.2437, + "eval_rougeL_for_task220_rocstories_title_generation": 94.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 59.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 26.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 40.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 91.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 61.55, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.639, + "eval_rougeL_for_task288_gigaword_title_generation": 28.3471, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 7.3333, + "eval_rougeL_for_task329_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 73.4238, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 60.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 62.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 88.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 35.725, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 34.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 66.7638, + "eval_rougeL_for_task418_persent_title_generation": 30.0644, + "eval_rougeL_for_task442_com_qa_question_rewriting": 69.3722, + "eval_rougeL_for_task500_scruples_title_generation": 19.7424, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.3912, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 63.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 39.0119, + "eval_rougeL_for_task602_wikitext_title_generation": 13.4883, + "eval_rougeL_for_task613_liar_keyword_tagging": 38.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 45.1278, + "eval_rougeL_for_task619_ohsumed_title_generation": 44.4998, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 45.6857, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 76.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 60.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 37.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 92.3524, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 45.6, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.5268, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 69.2679, + "eval_rougeL_for_task677_ollie_data_to_text": 30.5695, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 80.0, + "eval_rougeL_for_task743_eurlex_title_generation": 36.5257, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 7.0995, + "eval_rougeL_for_task769_qed_title_generation": 70.6329, + "eval_rougeL_for_task827_copa_cause_effect_classification": 84.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 60.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 74.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 46.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 56.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 73.4333, + "eval_rougeL_for_task892_gap_coreference_resolution": 45.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 37.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.7337, + "eval_rougeL_for_task970_sherliic_textual_entailment": 76.0, + "eval_rougeL_for_textual_entailment": 49.7222, + "eval_rougeL_for_title_generation": 36.9132, + "eval_rougeL_for_word_analogy": 46.9583, + "eval_runtime": 881.923, + "eval_samples_per_second": 13.505, + "eval_steps_per_second": 0.845, + "step": 4000 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.9139, + "step": 4500 + }, + { + "epoch": 0.98, + "eval_exact_match": 35.4408, + "eval_exact_match_for_answerability_classification": 61.8462, + "eval_exact_match_for_cause_effect_classification": 50.8571, + "eval_exact_match_for_coreference_resolution": 44.0, + "eval_exact_match_for_data_to_text": 6.4165, + "eval_exact_match_for_dialogue_act_recognition": 52.4286, + "eval_exact_match_for_grammar_error_correction": 6.5, + "eval_exact_match_for_keyword_tagging": 47.6, + "eval_exact_match_for_overlap_extraction": 19.0, + "eval_exact_match_for_question_rewriting": 2.0, + "eval_exact_match_for_task020_mctaco_answerability_classification": 61.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 54.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 3.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 45.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 38.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 60.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 43.0, + "eval_exact_match_for_task1153_bard_word_analogy": 28.0, + "eval_exact_match_for_task1154_bard_word_analogy": 24.0, + "eval_exact_match_for_task1155_bard_word_analogy": 80.0, + "eval_exact_match_for_task1156_bard_word_analogy": 57.0, + "eval_exact_match_for_task1157_bard_word_analogy": 63.0, + "eval_exact_match_for_task1158_bard_word_analogy": 37.0, + "eval_exact_match_for_task1159_bard_word_analogy": 45.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 49.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 72.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 1.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 27.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 36.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 42.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 42.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 62.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 82.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 61.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 29.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 69.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 61.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 72.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 60.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 51.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 60.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 45.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 71.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 15.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 2.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 45.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 76.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 16.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 5.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 99.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 60.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 97.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 53.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 53.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 61.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 64.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 60.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 69.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 62.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 36.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 74.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 9.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 17.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 21.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 70.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 36.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 36.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 39.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 3.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 82.0, + "eval_exact_match_for_task743_eurlex_title_generation": 3.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 57.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 85.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 58.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 65.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 30.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 50.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 62.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 38.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 73.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 81.0, + "eval_exact_match_for_textual_entailment": 48.125, + "eval_exact_match_for_title_generation": 10.2018, + "eval_exact_match_for_word_analogy": 47.125, + "eval_f1": 53.0952, + "eval_f1_for_answerability_classification": 64.4103, + "eval_f1_for_cause_effect_classification": 68.5672, + "eval_f1_for_coreference_resolution": 52.2378, + "eval_f1_for_data_to_text": 52.8057, + "eval_f1_for_dialogue_act_recognition": 55.2143, + "eval_f1_for_grammar_error_correction": 60.9798, + "eval_f1_for_keyword_tagging": 59.967, + "eval_f1_for_overlap_extraction": 28.0483, + "eval_f1_for_question_rewriting": 71.2536, + "eval_f1_for_task020_mctaco_answerability_classification": 61.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 57.8333, + "eval_f1_for_task034_winogrande_question_rewriting": 89.8992, + "eval_f1_for_task035_winogrande_question_rewriting": 89.117, + "eval_f1_for_task036_qasc_keyword_tagging": 74.8683, + "eval_f1_for_task039_qasc_overlap_extraction": 40.3333, + "eval_f1_for_task050_multirc_answerability_classification": 60.0, + "eval_f1_for_task102_commongen_data_to_text": 54.3801, + "eval_f1_for_task1152_bard_word_analogy": 43.0, + "eval_f1_for_task1153_bard_word_analogy": 28.6667, + "eval_f1_for_task1154_bard_word_analogy": 24.0, + "eval_f1_for_task1155_bard_word_analogy": 80.0, + "eval_f1_for_task1156_bard_word_analogy": 57.0, + "eval_f1_for_task1157_bard_word_analogy": 63.0, + "eval_f1_for_task1158_bard_word_analogy": 37.0, + "eval_f1_for_task1159_bard_word_analogy": 45.0, + "eval_f1_for_task1161_coda_19_title_generation": 38.1029, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.1693, + "eval_f1_for_task121_atomic_question_rewriting": 51.1662, + "eval_f1_for_task133_winowhy_coreference_resolution": 49.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 12.1994, + "eval_f1_for_task1344_rte_textual_entailment": 72.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.4186, + "eval_f1_for_task1356_xlsum_title_generation": 21.5177, + "eval_f1_for_task1358_xlsum_title_generation": 35.7246, + "eval_f1_for_task1385_anli_textual_entailment": 27.0, + "eval_f1_for_task1386_anli_textual_entailment": 36.0, + "eval_f1_for_task1387_anli_textual_entailment": 42.0, + "eval_f1_for_task1388_cb_textual_entailment": 42.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 62.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 82.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_f1_for_task1407_dart_data_to_text": 36.0293, + "eval_f1_for_task1409_dart_data_to_text": 49.8866, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.1227, + "eval_f1_for_task1439_doqa_answerability_classification": 53.0, + "eval_f1_for_task1442_doqa_answerability_classification": 61.0, + "eval_f1_for_task1516_imppres_textual_entailment": 29.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 69.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 34.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 61.0, + "eval_f1_for_task1540_peer_read_title_generation": 37.3917, + "eval_f1_for_task1554_scitail_textual_entailment": 72.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.8368, + "eval_f1_for_task1562_zest_question_rewriting": 57.9214, + "eval_f1_for_task1586_scifact_title_generation": 35.8393, + "eval_f1_for_task1598_nyc_data_to_text": 52.8419, + "eval_f1_for_task1612_sick_textual_entailment": 60.0, + "eval_f1_for_task1615_sick_textual_entailment": 51.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.3952, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 60.0, + "eval_f1_for_task1631_open_pi_data_to_text": 92.667, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 71.0, + "eval_f1_for_task1659_billsum_title_generation": 36.9264, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 65.2048, + "eval_f1_for_task1728_web_nlg_data_to_text": 62.608, + "eval_f1_for_task190_snli_textual_entailment": 2.0, + "eval_f1_for_task199_multinli_textual_entailment": 45.0, + "eval_f1_for_task200_multinli_textual_entailment": 76.0, + "eval_f1_for_task201_multinli_textual_entailment": 16.0, + "eval_f1_for_task202_multinli_textual_entailment": 5.0, + "eval_f1_for_task219_rocstories_title_generation": 16.0579, + "eval_f1_for_task220_rocstories_title_generation": 99.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 60.0, + "eval_f1_for_task232_iirc_answerability_classification": 46.0, + "eval_f1_for_task233_iirc_answerability_classification": 47.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 97.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 64.9667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 15.7633, + "eval_f1_for_task288_gigaword_title_generation": 30.654, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 12.3333, + "eval_f1_for_task329_gap_coreference_resolution": 53.0, + "eval_f1_for_task330_gap_coreference_resolution": 68.1905, + "eval_f1_for_task349_squad2.0_answerability_classification": 64.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 79.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 89.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 38.7057, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 38.5, + "eval_f1_for_task402_grailqa_question_rewriting": 74.1232, + "eval_f1_for_task418_persent_title_generation": 29.1986, + "eval_f1_for_task442_com_qa_question_rewriting": 71.7495, + "eval_f1_for_task500_scruples_title_generation": 19.1479, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.3867, + "eval_f1_for_task520_aquamuse_answerability_classification": 74.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 36.2578, + "eval_f1_for_task602_wikitext_title_generation": 10.5442, + "eval_f1_for_task613_liar_keyword_tagging": 20.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 39.2647, + "eval_f1_for_task619_ohsumed_title_generation": 43.7335, + "eval_f1_for_task620_ohsumed_keyword_tagging": 41.0667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 70.0, + "eval_f1_for_task640_e_snli_textual_entailment": 33.0, + "eval_f1_for_task641_e_snli_textual_entailment": 36.0, + "eval_f1_for_task642_e_snli_textual_entailment": 36.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.5667, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 60.2667, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.8074, + "eval_f1_for_task671_ambigqa_question_rewriting": 70.0232, + "eval_f1_for_task677_ollie_data_to_text": 32.8791, + "eval_f1_for_task738_perspectrum_textual_entailment": 82.0, + "eval_f1_for_task743_eurlex_title_generation": 37.8195, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.2135, + "eval_f1_for_task769_qed_title_generation": 76.171, + "eval_f1_for_task827_copa_cause_effect_classification": 85.0, + "eval_f1_for_task828_copa_cause_effect_classification": 58.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 65.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 30.0, + "eval_f1_for_task890_gwsd_textual_entailment": 50.0, + "eval_f1_for_task891_gap_coreference_resolution": 72.0333, + "eval_f1_for_task892_gap_coreference_resolution": 38.0, + "eval_f1_for_task893_gap_coreference_resolution": 40.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 73.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_f1_for_task957_e2e_data_to_text": 53.2675, + "eval_f1_for_task970_sherliic_textual_entailment": 81.0, + "eval_f1_for_textual_entailment": 48.125, + "eval_f1_for_title_generation": 36.8826, + "eval_f1_for_word_analogy": 47.2083, + "eval_gen_len": 8.6519, + "eval_global_step": 4500, + "eval_loss": 1.170645833015442, + "eval_rouge1": 54.732, + "eval_rouge1_for_answerability_classification": 64.4103, + "eval_rouge1_for_cause_effect_classification": 69.1455, + "eval_rouge1_for_coreference_resolution": 53.1279, + "eval_rouge1_for_data_to_text": 55.7648, + "eval_rouge1_for_dialogue_act_recognition": 58.8111, + "eval_rouge1_for_grammar_error_correction": 64.7508, + "eval_rouge1_for_keyword_tagging": 64.7441, + "eval_rouge1_for_overlap_extraction": 29.2556, + "eval_rouge1_for_question_rewriting": 72.6453, + "eval_rouge1_for_task020_mctaco_answerability_classification": 61.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 57.7333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 89.9229, + "eval_rouge1_for_task035_winogrande_question_rewriting": 89.668, + "eval_rouge1_for_task036_qasc_keyword_tagging": 80.8683, + "eval_rouge1_for_task039_qasc_overlap_extraction": 42.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 60.0, + "eval_rouge1_for_task102_commongen_data_to_text": 68.0559, + "eval_rouge1_for_task1152_bard_word_analogy": 43.0, + "eval_rouge1_for_task1153_bard_word_analogy": 31.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 24.0, + "eval_rouge1_for_task1155_bard_word_analogy": 80.0, + "eval_rouge1_for_task1156_bard_word_analogy": 57.0, + "eval_rouge1_for_task1157_bard_word_analogy": 63.0, + "eval_rouge1_for_task1158_bard_word_analogy": 37.0, + "eval_rouge1_for_task1159_bard_word_analogy": 45.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 41.684, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.3752, + "eval_rouge1_for_task121_atomic_question_rewriting": 53.1144, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 49.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.0332, + "eval_rouge1_for_task1344_rte_textual_entailment": 72.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.2658, + "eval_rouge1_for_task1356_xlsum_title_generation": 26.4233, + "eval_rouge1_for_task1358_xlsum_title_generation": 39.4536, + "eval_rouge1_for_task1385_anli_textual_entailment": 27.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 36.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 42.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 42.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 62.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 82.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 66.1778, + "eval_rouge1_for_task1407_dart_data_to_text": 36.862, + "eval_rouge1_for_task1409_dart_data_to_text": 51.6816, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 42.3592, + "eval_rouge1_for_task1439_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 61.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 29.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 69.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 61.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 39.6662, + "eval_rouge1_for_task1554_scitail_textual_entailment": 72.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.1425, + "eval_rouge1_for_task1562_zest_question_rewriting": 60.93, + "eval_rouge1_for_task1586_scifact_title_generation": 39.7907, + "eval_rouge1_for_task1598_nyc_data_to_text": 54.0358, + "eval_rouge1_for_task1612_sick_textual_entailment": 60.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 83.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.6484, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 60.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 92.7517, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 71.0, + "eval_rouge1_for_task1659_billsum_title_generation": 38.5111, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 65.1381, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 64.4898, + "eval_rouge1_for_task190_snli_textual_entailment": 2.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 45.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 76.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 16.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 5.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.6968, + "eval_rouge1_for_task220_rocstories_title_generation": 99.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 60.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 47.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 97.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 65.8, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 16.1778, + "eval_rouge1_for_task288_gigaword_title_generation": 34.1247, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 12.3333, + "eval_rouge1_for_task329_gap_coreference_resolution": 53.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 68.119, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 64.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 79.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 89.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 39.0814, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 49.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 75.8757, + "eval_rouge1_for_task418_persent_title_generation": 33.4913, + "eval_rouge1_for_task442_com_qa_question_rewriting": 75.2211, + "eval_rouge1_for_task500_scruples_title_generation": 21.7465, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.7628, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 74.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 38.0459, + "eval_rouge1_for_task602_wikitext_title_generation": 11.6065, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 42.9371, + "eval_rouge1_for_task619_ohsumed_title_generation": 47.2257, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 47.7857, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 70.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 36.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 36.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.0667, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 60.8, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.4919, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 70.5854, + "eval_rouge1_for_task677_ollie_data_to_text": 35.2316, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 82.0, + "eval_rouge1_for_task743_eurlex_title_generation": 40.3802, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.8676, + "eval_rouge1_for_task769_qed_title_generation": 76.1912, + "eval_rouge1_for_task827_copa_cause_effect_classification": 85.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 58.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 65.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 50.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 72.3667, + "eval_rouge1_for_task892_gap_coreference_resolution": 38.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 40.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 73.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rouge1_for_task957_e2e_data_to_text": 55.7235, + "eval_rouge1_for_task970_sherliic_textual_entailment": 81.0, + "eval_rouge1_for_textual_entailment": 49.4861, + "eval_rouge1_for_title_generation": 39.3485, + "eval_rouge1_for_word_analogy": 47.5833, + "eval_rougeL": 53.33, + "eval_rougeL_for_answerability_classification": 64.4103, + "eval_rougeL_for_cause_effect_classification": 68.4619, + "eval_rougeL_for_coreference_resolution": 53.1279, + "eval_rougeL_for_data_to_text": 47.2364, + "eval_rougeL_for_dialogue_act_recognition": 58.8111, + "eval_rougeL_for_grammar_error_correction": 63.864, + "eval_rougeL_for_keyword_tagging": 64.319, + "eval_rougeL_for_overlap_extraction": 29.0105, + "eval_rougeL_for_question_rewriting": 69.3045, + "eval_rougeL_for_task020_mctaco_answerability_classification": 61.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 57.7333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 89.8604, + "eval_rougeL_for_task035_winogrande_question_rewriting": 88.9586, + "eval_rougeL_for_task036_qasc_keyword_tagging": 79.6429, + "eval_rougeL_for_task039_qasc_overlap_extraction": 42.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 60.0, + "eval_rougeL_for_task102_commongen_data_to_text": 57.2125, + "eval_rougeL_for_task1152_bard_word_analogy": 43.0, + "eval_rougeL_for_task1153_bard_word_analogy": 31.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 24.0, + "eval_rougeL_for_task1155_bard_word_analogy": 80.0, + "eval_rougeL_for_task1156_bard_word_analogy": 57.0, + "eval_rougeL_for_task1157_bard_word_analogy": 63.0, + "eval_rougeL_for_task1158_bard_word_analogy": 37.0, + "eval_rougeL_for_task1159_bard_word_analogy": 45.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 35.7687, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.9722, + "eval_rougeL_for_task121_atomic_question_rewriting": 48.8925, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 49.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.5755, + "eval_rougeL_for_task1344_rte_textual_entailment": 72.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.587, + "eval_rougeL_for_task1356_xlsum_title_generation": 22.8014, + "eval_rougeL_for_task1358_xlsum_title_generation": 34.2347, + "eval_rougeL_for_task1385_anli_textual_entailment": 27.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 36.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 42.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 42.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 62.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 82.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 66.1778, + "eval_rougeL_for_task1407_dart_data_to_text": 30.6751, + "eval_rougeL_for_task1409_dart_data_to_text": 41.8184, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.5017, + "eval_rougeL_for_task1439_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 61.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 29.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 69.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 61.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 38.2102, + "eval_rougeL_for_task1554_scitail_textual_entailment": 72.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.2263, + "eval_rougeL_for_task1562_zest_question_rewriting": 52.7828, + "eval_rougeL_for_task1586_scifact_title_generation": 33.5899, + "eval_rougeL_for_task1598_nyc_data_to_text": 40.8662, + "eval_rougeL_for_task1612_sick_textual_entailment": 60.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 83.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.7271, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 60.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 89.8138, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 71.0, + "eval_rougeL_for_task1659_billsum_title_generation": 33.3103, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 65.1381, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 56.3108, + "eval_rougeL_for_task190_snli_textual_entailment": 2.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 45.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 76.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 16.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 5.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.6968, + "eval_rougeL_for_task220_rocstories_title_generation": 99.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 60.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 47.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 97.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 65.8, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 15.6876, + "eval_rougeL_for_task288_gigaword_title_generation": 29.7749, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 12.3333, + "eval_rougeL_for_task329_gap_coreference_resolution": 53.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 68.119, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 64.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 79.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 89.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 37.5247, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 49.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 62.6517, + "eval_rougeL_for_task418_persent_title_generation": 29.5421, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.2473, + "eval_rougeL_for_task500_scruples_title_generation": 20.8426, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 43.5385, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 74.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 36.5903, + "eval_rougeL_for_task602_wikitext_title_generation": 11.6065, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 39.7086, + "eval_rougeL_for_task619_ohsumed_title_generation": 40.901, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 46.8857, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 70.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 36.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 36.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.0667, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 60.8, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.8076, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 69.8619, + "eval_rougeL_for_task677_ollie_data_to_text": 29.0584, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 82.0, + "eval_rougeL_for_task743_eurlex_title_generation": 35.0455, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.1767, + "eval_rougeL_for_task769_qed_title_generation": 76.1912, + "eval_rougeL_for_task827_copa_cause_effect_classification": 85.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 58.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 65.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 50.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 72.3667, + "eval_rougeL_for_task892_gap_coreference_resolution": 38.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 40.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 73.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.8114, + "eval_rougeL_for_task970_sherliic_textual_entailment": 81.0, + "eval_rougeL_for_textual_entailment": 49.4861, + "eval_rougeL_for_title_generation": 36.5114, + "eval_rougeL_for_word_analogy": 47.5833, + "eval_runtime": 842.7447, + "eval_samples_per_second": 14.132, + "eval_steps_per_second": 0.884, + "step": 4500 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.7351, + "step": 5000 + }, + { + "epoch": 1.09, + "eval_exact_match": 34.2653, + "eval_exact_match_for_answerability_classification": 56.7692, + "eval_exact_match_for_cause_effect_classification": 53.0, + "eval_exact_match_for_coreference_resolution": 43.2143, + "eval_exact_match_for_data_to_text": 5.4479, + "eval_exact_match_for_dialogue_act_recognition": 49.7143, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 45.6, + "eval_exact_match_for_overlap_extraction": 16.5, + "eval_exact_match_for_question_rewriting": 2.0, + "eval_exact_match_for_task020_mctaco_answerability_classification": 54.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 57.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 3.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 17.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 33.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 61.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 41.0, + "eval_exact_match_for_task1153_bard_word_analogy": 24.0, + "eval_exact_match_for_task1154_bard_word_analogy": 23.0, + "eval_exact_match_for_task1155_bard_word_analogy": 76.0, + "eval_exact_match_for_task1156_bard_word_analogy": 66.0, + "eval_exact_match_for_task1157_bard_word_analogy": 54.0, + "eval_exact_match_for_task1158_bard_word_analogy": 44.0, + "eval_exact_match_for_task1159_bard_word_analogy": 37.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 1.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 65.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 36.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 36.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 41.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 71.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 84.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 2.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 44.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 56.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 48.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 68.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 60.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 24.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 75.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 64.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 52.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 58.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 36.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 24.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 6.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 86.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 12.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 3.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 45.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 90.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 50.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 60.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 58.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 70.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 66.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 1.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 24.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 52.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 7.1429, + "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 17.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 26.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 36.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 24.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 59.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 63.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 87.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 63.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 74.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 54.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 60.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 30.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 76.0, + "eval_exact_match_for_textual_entailment": 46.7917, + "eval_exact_match_for_title_generation": 10.5381, + "eval_exact_match_for_word_analogy": 45.625, + "eval_f1": 52.1985, + "eval_f1_for_answerability_classification": 59.3333, + "eval_f1_for_cause_effect_classification": 69.4151, + "eval_f1_for_coreference_resolution": 50.5712, + "eval_f1_for_data_to_text": 53.5066, + "eval_f1_for_dialogue_act_recognition": 52.2143, + "eval_f1_for_grammar_error_correction": 65.7346, + "eval_f1_for_keyword_tagging": 60.7919, + "eval_f1_for_overlap_extraction": 42.1238, + "eval_f1_for_question_rewriting": 69.8447, + "eval_f1_for_task020_mctaco_answerability_classification": 54.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 59.0, + "eval_f1_for_task034_winogrande_question_rewriting": 89.381, + "eval_f1_for_task035_winogrande_question_rewriting": 85.2004, + "eval_f1_for_task036_qasc_keyword_tagging": 63.7738, + "eval_f1_for_task039_qasc_overlap_extraction": 43.7333, + "eval_f1_for_task050_multirc_answerability_classification": 61.0, + "eval_f1_for_task102_commongen_data_to_text": 55.6546, + "eval_f1_for_task1152_bard_word_analogy": 41.0, + "eval_f1_for_task1153_bard_word_analogy": 26.0, + "eval_f1_for_task1154_bard_word_analogy": 23.0, + "eval_f1_for_task1155_bard_word_analogy": 76.0, + "eval_f1_for_task1156_bard_word_analogy": 66.0, + "eval_f1_for_task1157_bard_word_analogy": 54.0, + "eval_f1_for_task1158_bard_word_analogy": 44.0, + "eval_f1_for_task1159_bard_word_analogy": 37.0, + "eval_f1_for_task1161_coda_19_title_generation": 37.8875, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.9353, + "eval_f1_for_task121_atomic_question_rewriting": 49.1156, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.0557, + "eval_f1_for_task1344_rte_textual_entailment": 65.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.3423, + "eval_f1_for_task1356_xlsum_title_generation": 23.0792, + "eval_f1_for_task1358_xlsum_title_generation": 35.7076, + "eval_f1_for_task1385_anli_textual_entailment": 30.0, + "eval_f1_for_task1386_anli_textual_entailment": 36.0, + "eval_f1_for_task1387_anli_textual_entailment": 36.0, + "eval_f1_for_task1388_cb_textual_entailment": 41.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 71.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 84.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 53.0, + "eval_f1_for_task1407_dart_data_to_text": 36.3418, + "eval_f1_for_task1409_dart_data_to_text": 54.3137, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 47.228, + "eval_f1_for_task1439_doqa_answerability_classification": 44.0, + "eval_f1_for_task1442_doqa_answerability_classification": 56.0, + "eval_f1_for_task1516_imppres_textual_entailment": 48.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 68.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 60.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 24.0, + "eval_f1_for_task1540_peer_read_title_generation": 37.8313, + "eval_f1_for_task1554_scitail_textual_entailment": 75.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.2412, + "eval_f1_for_task1562_zest_question_rewriting": 55.4933, + "eval_f1_for_task1586_scifact_title_generation": 37.0905, + "eval_f1_for_task1598_nyc_data_to_text": 51.2505, + "eval_f1_for_task1612_sick_textual_entailment": 64.0, + "eval_f1_for_task1615_sick_textual_entailment": 52.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.6784, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 58.0, + "eval_f1_for_task1631_open_pi_data_to_text": 92.4024, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_f1_for_task1659_billsum_title_generation": 35.7735, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 73.2714, + "eval_f1_for_task1728_web_nlg_data_to_text": 65.2996, + "eval_f1_for_task190_snli_textual_entailment": 6.0, + "eval_f1_for_task199_multinli_textual_entailment": 35.0, + "eval_f1_for_task200_multinli_textual_entailment": 86.0, + "eval_f1_for_task201_multinli_textual_entailment": 12.0, + "eval_f1_for_task202_multinli_textual_entailment": 3.0, + "eval_f1_for_task219_rocstories_title_generation": 17.7603, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_f1_for_task232_iirc_answerability_classification": 45.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 90.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 61.4667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 40.5142, + "eval_f1_for_task288_gigaword_title_generation": 30.4715, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 8.6667, + "eval_f1_for_task329_gap_coreference_resolution": 55.0, + "eval_f1_for_task330_gap_coreference_resolution": 69.2905, + "eval_f1_for_task349_squad2.0_answerability_classification": 58.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 70.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 90.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 88.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.8146, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 26.0, + "eval_f1_for_task402_grailqa_question_rewriting": 69.1071, + "eval_f1_for_task418_persent_title_generation": 26.5494, + "eval_f1_for_task442_com_qa_question_rewriting": 72.568, + "eval_f1_for_task500_scruples_title_generation": 18.9698, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.9514, + "eval_f1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 37.0577, + "eval_f1_for_task602_wikitext_title_generation": 13.7993, + "eval_f1_for_task613_liar_keyword_tagging": 23.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 39.4245, + "eval_f1_for_task619_ohsumed_title_generation": 43.9097, + "eval_f1_for_task620_ohsumed_keyword_tagging": 36.0, + "eval_f1_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_f1_for_task640_e_snli_textual_entailment": 26.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 36.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.8524, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 38.1571, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.2949, + "eval_f1_for_task671_ambigqa_question_rewriting": 69.1749, + "eval_f1_for_task677_ollie_data_to_text": 33.4538, + "eval_f1_for_task738_perspectrum_textual_entailment": 59.0, + "eval_f1_for_task743_eurlex_title_generation": 37.2216, + "eval_f1_for_task760_msr_sqa_data_to_text": 8.9209, + "eval_f1_for_task769_qed_title_generation": 81.2636, + "eval_f1_for_task827_copa_cause_effect_classification": 87.0, + "eval_f1_for_task828_copa_cause_effect_classification": 63.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 74.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_f1_for_task890_gwsd_textual_entailment": 54.0, + "eval_f1_for_task891_gap_coreference_resolution": 70.1444, + "eval_f1_for_task892_gap_coreference_resolution": 46.0, + "eval_f1_for_task893_gap_coreference_resolution": 30.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task957_e2e_data_to_text": 50.9289, + "eval_f1_for_task970_sherliic_textual_entailment": 76.0, + "eval_f1_for_textual_entailment": 46.7917, + "eval_f1_for_title_generation": 37.3415, + "eval_f1_for_word_analogy": 45.875, + "eval_gen_len": 9.1441, + "eval_global_step": 5000, + "eval_loss": 1.2105255126953125, + "eval_rouge1": 53.9861, + "eval_rouge1_for_answerability_classification": 59.3333, + "eval_rouge1_for_cause_effect_classification": 69.9713, + "eval_rouge1_for_coreference_resolution": 51.3058, + "eval_rouge1_for_data_to_text": 56.2535, + "eval_rouge1_for_dialogue_act_recognition": 55.1762, + "eval_rouge1_for_grammar_error_correction": 68.7265, + "eval_rouge1_for_keyword_tagging": 65.8694, + "eval_rouge1_for_overlap_extraction": 44.0883, + "eval_rouge1_for_question_rewriting": 71.3393, + "eval_rouge1_for_task020_mctaco_answerability_classification": 54.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 59.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 89.4111, + "eval_rouge1_for_task035_winogrande_question_rewriting": 86.1593, + "eval_rouge1_for_task036_qasc_keyword_tagging": 68.1944, + "eval_rouge1_for_task039_qasc_overlap_extraction": 46.7333, + "eval_rouge1_for_task050_multirc_answerability_classification": 61.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.8695, + "eval_rouge1_for_task1152_bard_word_analogy": 41.0, + "eval_rouge1_for_task1153_bard_word_analogy": 27.0, + "eval_rouge1_for_task1154_bard_word_analogy": 23.0, + "eval_rouge1_for_task1155_bard_word_analogy": 76.0, + "eval_rouge1_for_task1156_bard_word_analogy": 66.0, + "eval_rouge1_for_task1157_bard_word_analogy": 54.0, + "eval_rouge1_for_task1158_bard_word_analogy": 44.0, + "eval_rouge1_for_task1159_bard_word_analogy": 37.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 41.2537, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.1496, + "eval_rouge1_for_task121_atomic_question_rewriting": 51.3263, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.7758, + "eval_rouge1_for_task1344_rte_textual_entailment": 65.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.7417, + "eval_rouge1_for_task1356_xlsum_title_generation": 27.5042, + "eval_rouge1_for_task1358_xlsum_title_generation": 39.3042, + "eval_rouge1_for_task1385_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 36.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 39.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 42.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 71.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 84.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 60.4, + "eval_rouge1_for_task1407_dart_data_to_text": 37.3496, + "eval_rouge1_for_task1409_dart_data_to_text": 56.1133, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 50.264, + "eval_rouge1_for_task1439_doqa_answerability_classification": 44.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 56.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 48.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 68.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 60.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 24.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 41.4522, + "eval_rouge1_for_task1554_scitail_textual_entailment": 75.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.189, + "eval_rouge1_for_task1562_zest_question_rewriting": 58.8552, + "eval_rouge1_for_task1586_scifact_title_generation": 41.8511, + "eval_rouge1_for_task1598_nyc_data_to_text": 52.109, + "eval_rouge1_for_task1612_sick_textual_entailment": 64.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 84.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.9428, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 58.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 92.4991, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_rouge1_for_task1659_billsum_title_generation": 37.4158, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 73.2714, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 66.8716, + "eval_rouge1_for_task190_snli_textual_entailment": 6.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 86.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 12.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 3.0, + "eval_rouge1_for_task219_rocstories_title_generation": 22.3167, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 45.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 90.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 61.6333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 41.4433, + "eval_rouge1_for_task288_gigaword_title_generation": 33.9372, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 8.6667, + "eval_rouge1_for_task329_gap_coreference_resolution": 55.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 69.0524, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 58.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 70.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 90.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 88.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.6514, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 36.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 70.5863, + "eval_rouge1_for_task418_persent_title_generation": 29.4632, + "eval_rouge1_for_task442_com_qa_question_rewriting": 75.8879, + "eval_rouge1_for_task500_scruples_title_generation": 20.5621, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.4146, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 38.9368, + "eval_rouge1_for_task602_wikitext_title_generation": 14.7761, + "eval_rouge1_for_task613_liar_keyword_tagging": 35.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 42.4808, + "eval_rouge1_for_task619_ohsumed_title_generation": 47.7419, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.1333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 26.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 36.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.3524, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 38.4238, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.9611, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.7109, + "eval_rouge1_for_task677_ollie_data_to_text": 36.2977, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 82.0, + "eval_rouge1_for_task743_eurlex_title_generation": 39.1357, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 9.5922, + "eval_rouge1_for_task769_qed_title_generation": 81.2636, + "eval_rouge1_for_task827_copa_cause_effect_classification": 87.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 63.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 74.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 58.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 54.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 70.2333, + "eval_rouge1_for_task892_gap_coreference_resolution": 46.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 30.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task957_e2e_data_to_text": 53.0504, + "eval_rouge1_for_task970_sherliic_textual_entailment": 76.0, + "eval_rouge1_for_textual_entailment": 49.25, + "eval_rouge1_for_title_generation": 39.7837, + "eval_rouge1_for_word_analogy": 46.0, + "eval_rougeL": 52.5565, + "eval_rougeL_for_answerability_classification": 59.3333, + "eval_rougeL_for_cause_effect_classification": 69.4163, + "eval_rougeL_for_coreference_resolution": 51.3058, + "eval_rougeL_for_data_to_text": 48.1077, + "eval_rougeL_for_dialogue_act_recognition": 55.1762, + "eval_rougeL_for_grammar_error_correction": 67.4079, + "eval_rougeL_for_keyword_tagging": 65.1668, + "eval_rougeL_for_overlap_extraction": 43.5603, + "eval_rougeL_for_question_rewriting": 67.9477, + "eval_rougeL_for_task020_mctaco_answerability_classification": 54.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 59.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 88.6901, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.091, + "eval_rougeL_for_task036_qasc_keyword_tagging": 65.8317, + "eval_rougeL_for_task039_qasc_overlap_extraction": 46.7333, + "eval_rougeL_for_task050_multirc_answerability_classification": 61.0, + "eval_rougeL_for_task102_commongen_data_to_text": 59.7823, + "eval_rougeL_for_task1152_bard_word_analogy": 41.0, + "eval_rougeL_for_task1153_bard_word_analogy": 27.0, + "eval_rougeL_for_task1154_bard_word_analogy": 23.0, + "eval_rougeL_for_task1155_bard_word_analogy": 76.0, + "eval_rougeL_for_task1156_bard_word_analogy": 66.0, + "eval_rougeL_for_task1157_bard_word_analogy": 54.0, + "eval_rougeL_for_task1158_bard_word_analogy": 44.0, + "eval_rougeL_for_task1159_bard_word_analogy": 37.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 34.9912, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.5887, + "eval_rougeL_for_task121_atomic_question_rewriting": 45.1966, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.1922, + "eval_rougeL_for_task1344_rte_textual_entailment": 65.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.6549, + "eval_rougeL_for_task1356_xlsum_title_generation": 23.5924, + "eval_rougeL_for_task1358_xlsum_title_generation": 33.6949, + "eval_rougeL_for_task1385_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 36.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 39.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 42.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 71.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 84.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 60.4, + "eval_rougeL_for_task1407_dart_data_to_text": 32.4564, + "eval_rougeL_for_task1409_dart_data_to_text": 45.5556, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 48.5431, + "eval_rougeL_for_task1439_doqa_answerability_classification": 44.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 56.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 48.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 68.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 60.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 24.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 37.9967, + "eval_rougeL_for_task1554_scitail_textual_entailment": 75.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.2728, + "eval_rougeL_for_task1562_zest_question_rewriting": 50.7795, + "eval_rougeL_for_task1586_scifact_title_generation": 34.8948, + "eval_rougeL_for_task1598_nyc_data_to_text": 39.4507, + "eval_rougeL_for_task1612_sick_textual_entailment": 64.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 84.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.7105, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 58.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 89.1569, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_rougeL_for_task1659_billsum_title_generation": 32.4756, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 73.2714, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 58.3923, + "eval_rougeL_for_task190_snli_textual_entailment": 6.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 86.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 12.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 3.0, + "eval_rougeL_for_task219_rocstories_title_generation": 22.3167, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 45.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 90.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 61.6333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 40.3873, + "eval_rougeL_for_task288_gigaword_title_generation": 29.2154, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 8.6667, + "eval_rougeL_for_task329_gap_coreference_resolution": 55.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 69.0524, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 58.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 70.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 90.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 88.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 33.7189, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 36.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 59.8418, + "eval_rougeL_for_task418_persent_title_generation": 26.507, + "eval_rougeL_for_task442_com_qa_question_rewriting": 71.6748, + "eval_rougeL_for_task500_scruples_title_generation": 19.252, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.0527, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 37.2973, + "eval_rougeL_for_task602_wikitext_title_generation": 14.5381, + "eval_rougeL_for_task613_liar_keyword_tagging": 35.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 39.5286, + "eval_rougeL_for_task619_ohsumed_title_generation": 41.0256, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 42.9833, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 26.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 36.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.3524, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 38.4238, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.2668, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.9301, + "eval_rougeL_for_task677_ollie_data_to_text": 29.6588, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 82.0, + "eval_rougeL_for_task743_eurlex_title_generation": 34.1681, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 8.1916, + "eval_rougeL_for_task769_qed_title_generation": 81.2636, + "eval_rougeL_for_task827_copa_cause_effect_classification": 87.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 63.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 74.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 58.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 54.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 70.2333, + "eval_rougeL_for_task892_gap_coreference_resolution": 46.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 30.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.7866, + "eval_rougeL_for_task970_sherliic_textual_entailment": 76.0, + "eval_rougeL_for_textual_entailment": 49.25, + "eval_rougeL_for_title_generation": 36.7236, + "eval_rougeL_for_word_analogy": 46.0, + "eval_runtime": 853.9076, + "eval_samples_per_second": 13.948, + "eval_steps_per_second": 0.872, + "step": 5000 + }, + { + "epoch": 1.09, + "step": 5000, + "total_flos": 7.677839450340065e+17, + "train_loss": 0.9811246063232422, + "train_runtime": 36938.4245, + "train_samples_per_second": 2.166, + "train_steps_per_second": 0.135 + } + ], + "max_steps": 5000, + "num_train_epochs": 2, + "total_flos": 7.677839450340065e+17, + "trial_name": null, + "trial_params": null +}