diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7697 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0926472194908774, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 6.9912, + "step": 1 + }, + { + "epoch": 0.0, + "eval_exact_match": 1.9983, + "eval_exact_match_for_answerability_classification": 5.7692, + "eval_exact_match_for_cause_effect_classification": 3.0, + "eval_exact_match_for_coreference_resolution": 2.5, + "eval_exact_match_for_data_to_text": 0.4843, + "eval_exact_match_for_dialogue_act_recognition": 1.0, + "eval_exact_match_for_grammar_error_correction": 6.5, + "eval_exact_match_for_keyword_tagging": 0.4, + "eval_exact_match_for_overlap_extraction": 0.0, + "eval_exact_match_for_question_rewriting": 0.7273, + "eval_exact_match_for_task020_mctaco_answerability_classification": 25.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 1.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 2.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 0.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 0.0, + "eval_exact_match_for_task102_commongen_data_to_text": 1.0, + "eval_exact_match_for_task1152_bard_word_analogy": 0.0, + "eval_exact_match_for_task1153_bard_word_analogy": 0.0, + "eval_exact_match_for_task1154_bard_word_analogy": 5.0, + "eval_exact_match_for_task1155_bard_word_analogy": 5.0, + "eval_exact_match_for_task1156_bard_word_analogy": 3.0, + "eval_exact_match_for_task1157_bard_word_analogy": 4.0, + "eval_exact_match_for_task1158_bard_word_analogy": 0.0, + "eval_exact_match_for_task1159_bard_word_analogy": 0.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 23.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 0.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 0.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 0.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 0.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 0.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 0.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 14.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 2.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 0.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 1.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 0.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 7.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 0.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 1.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 8.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 0.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 41.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 4.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 0.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 0.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 0.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 2.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 0.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 21.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 5.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 0.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 1.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 0.0, + "eval_exact_match_for_textual_entailment": 2.0833, + "eval_exact_match_for_title_generation": 0.3363, + "eval_exact_match_for_word_analogy": 2.125, + "eval_f1": 16.8233, + "eval_f1_for_answerability_classification": 11.6646, + "eval_f1_for_cause_effect_classification": 11.5915, + "eval_f1_for_coreference_resolution": 12.0751, + "eval_f1_for_data_to_text": 29.8471, + "eval_f1_for_dialogue_act_recognition": 4.221, + "eval_f1_for_grammar_error_correction": 54.4276, + "eval_f1_for_keyword_tagging": 19.1076, + "eval_f1_for_overlap_extraction": 20.3979, + "eval_f1_for_question_rewriting": 48.0248, + "eval_f1_for_task020_mctaco_answerability_classification": 29.1888, + "eval_f1_for_task033_winogrande_coreference_resolution": 5.6775, + "eval_f1_for_task034_winogrande_question_rewriting": 5.0222, + "eval_f1_for_task035_winogrande_question_rewriting": 9.1107, + "eval_f1_for_task036_qasc_keyword_tagging": 43.4226, + "eval_f1_for_task039_qasc_overlap_extraction": 17.7689, + "eval_f1_for_task050_multirc_answerability_classification": 11.5557, + "eval_f1_for_task102_commongen_data_to_text": 29.283, + "eval_f1_for_task1152_bard_word_analogy": 0.0, + "eval_f1_for_task1153_bard_word_analogy": 0.0, + "eval_f1_for_task1154_bard_word_analogy": 8.8333, + "eval_f1_for_task1155_bard_word_analogy": 27.6333, + "eval_f1_for_task1156_bard_word_analogy": 16.3333, + "eval_f1_for_task1157_bard_word_analogy": 7.5, + "eval_f1_for_task1158_bard_word_analogy": 3.6667, + "eval_f1_for_task1159_bard_word_analogy": 4.0, + "eval_f1_for_task1161_coda_19_title_generation": 19.0595, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 68.4367, + "eval_f1_for_task121_atomic_question_rewriting": 50.3432, + "eval_f1_for_task133_winowhy_coreference_resolution": 31.9231, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.2891, + "eval_f1_for_task1344_rte_textual_entailment": 10.3306, + "eval_f1_for_task1345_qqp_question_rewriting": 35.2513, + "eval_f1_for_task1356_xlsum_title_generation": 7.5514, + "eval_f1_for_task1358_xlsum_title_generation": 27.9663, + "eval_f1_for_task1385_anli_textual_entailment": 4.574, + "eval_f1_for_task1386_anli_textual_entailment": 4.3745, + "eval_f1_for_task1387_anli_textual_entailment": 2.8872, + "eval_f1_for_task1388_cb_textual_entailment": 4.0459, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 0.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 11.7231, + "eval_f1_for_task1393_copa_cause_effect_classification": 8.0383, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 0.3852, + "eval_f1_for_task1407_dart_data_to_text": 34.6254, + "eval_f1_for_task1409_dart_data_to_text": 37.4756, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.0759, + "eval_f1_for_task1439_doqa_answerability_classification": 1.0779, + "eval_f1_for_task1442_doqa_answerability_classification": 1.0852, + "eval_f1_for_task1516_imppres_textual_entailment": 0.25, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 19.6444, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 1.5738, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 3.5316, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 2.6891, + "eval_f1_for_task1540_peer_read_title_generation": 12.5072, + "eval_f1_for_task1554_scitail_textual_entailment": 15.5882, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 79.7793, + "eval_f1_for_task1562_zest_question_rewriting": 56.5651, + "eval_f1_for_task1586_scifact_title_generation": 19.6496, + "eval_f1_for_task1598_nyc_data_to_text": 14.4803, + "eval_f1_for_task1612_sick_textual_entailment": 28.5714, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 69.7476, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 1.2288, + "eval_f1_for_task1631_open_pi_data_to_text": 39.9253, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 0.5818, + "eval_f1_for_task1659_billsum_title_generation": 24.9231, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 33.5072, + "eval_f1_for_task1728_web_nlg_data_to_text": 28.0827, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 0.669, + "eval_f1_for_task200_multinli_textual_entailment": 4.8151, + "eval_f1_for_task201_multinli_textual_entailment": 18.9612, + "eval_f1_for_task202_multinli_textual_entailment": 10.7721, + "eval_f1_for_task219_rocstories_title_generation": 8.2957, + "eval_f1_for_task220_rocstories_title_generation": 28.0007, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 18.1857, + "eval_f1_for_task232_iirc_answerability_classification": 10.0837, + "eval_f1_for_task233_iirc_answerability_classification": 10.0276, + "eval_f1_for_task242_tweetqa_answerability_classification": 11.0979, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 39.6497, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 23.0268, + "eval_f1_for_task288_gigaword_title_generation": 18.5248, + "eval_f1_for_task290_tellmewhy_answerability_classification": 1.0409, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 1.4376, + "eval_f1_for_task329_gap_coreference_resolution": 15.5188, + "eval_f1_for_task330_gap_coreference_resolution": 1.2691, + "eval_f1_for_task349_squad2.0_answerability_classification": 14.2033, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 7.7146, + "eval_f1_for_task391_cod3s_cause_effect_classification": 0.2, + "eval_f1_for_task392_cod3s_cause_effect_classification": 0.6505, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.0665, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 1.3508, + "eval_f1_for_task402_grailqa_question_rewriting": 36.0823, + "eval_f1_for_task418_persent_title_generation": 9.1383, + "eval_f1_for_task442_com_qa_question_rewriting": 54.079, + "eval_f1_for_task500_scruples_title_generation": 8.2607, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 15.8456, + "eval_f1_for_task520_aquamuse_answerability_classification": 42.2822, + "eval_f1_for_task569_recipe_nlg_title_generation": 29.1657, + "eval_f1_for_task602_wikitext_title_generation": 8.1403, + "eval_f1_for_task613_liar_keyword_tagging": 3.471, + "eval_f1_for_task614_glucose_cause_effect_classification": 19.175, + "eval_f1_for_task619_ohsumed_title_generation": 24.933, + "eval_f1_for_task620_ohsumed_keyword_tagging": 17.9686, + "eval_f1_for_task623_ohsumed_keyword_tagging": 7.766, + "eval_f1_for_task640_e_snli_textual_entailment": 0.0, + "eval_f1_for_task641_e_snli_textual_entailment": 0.0, + "eval_f1_for_task642_e_snli_textual_entailment": 0.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 22.9098, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 18.5967, + "eval_f1_for_task670_ambigqa_question_rewriting": 80.0136, + "eval_f1_for_task671_ambigqa_question_rewriting": 63.6214, + "eval_f1_for_task677_ollie_data_to_text": 35.7262, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0513, + "eval_f1_for_task743_eurlex_title_generation": 19.2596, + "eval_f1_for_task760_msr_sqa_data_to_text": 8.5385, + "eval_f1_for_task769_qed_title_generation": 7.0911, + "eval_f1_for_task827_copa_cause_effect_classification": 21.5079, + "eval_f1_for_task828_copa_cause_effect_classification": 0.5023, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 6.4484, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 7.2044, + "eval_f1_for_task890_gwsd_textual_entailment": 2.1667, + "eval_f1_for_task891_gap_coreference_resolution": 3.4598, + "eval_f1_for_task892_gap_coreference_resolution": 2.8086, + "eval_f1_for_task893_gap_coreference_resolution": 2.1289, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 11.8, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 4.9152, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 9.5944, + "eval_f1_for_task957_e2e_data_to_text": 24.7183, + "eval_f1_for_task970_sherliic_textual_entailment": 0.0, + "eval_f1_for_textual_entailment": 7.7921, + "eval_f1_for_title_generation": 16.6648, + "eval_f1_for_word_analogy": 8.4958, + "eval_gen_len": 45.2844, + "eval_global_step": 1, + "eval_loss": 5.956121444702148, + "eval_rouge1": 18.595, + "eval_rouge1_for_answerability_classification": 11.6321, + "eval_rouge1_for_cause_effect_classification": 14.167, + "eval_rouge1_for_coreference_resolution": 12.2122, + "eval_rouge1_for_data_to_text": 36.3993, + "eval_rouge1_for_dialogue_act_recognition": 4.942, + "eval_rouge1_for_grammar_error_correction": 59.1266, + "eval_rouge1_for_keyword_tagging": 21.3722, + "eval_rouge1_for_overlap_extraction": 21.1776, + "eval_rouge1_for_question_rewriting": 49.5633, + "eval_rouge1_for_task020_mctaco_answerability_classification": 29.1152, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 6.5624, + "eval_rouge1_for_task034_winogrande_question_rewriting": 5.0202, + "eval_rouge1_for_task035_winogrande_question_rewriting": 9.1749, + "eval_rouge1_for_task036_qasc_keyword_tagging": 45.2664, + "eval_rouge1_for_task039_qasc_overlap_extraction": 18.7849, + "eval_rouge1_for_task050_multirc_answerability_classification": 11.5202, + "eval_rouge1_for_task102_commongen_data_to_text": 44.5225, + "eval_rouge1_for_task1152_bard_word_analogy": 0.0, + "eval_rouge1_for_task1153_bard_word_analogy": 0.0, + "eval_rouge1_for_task1154_bard_word_analogy": 8.8333, + "eval_rouge1_for_task1155_bard_word_analogy": 27.6333, + "eval_rouge1_for_task1156_bard_word_analogy": 16.3333, + "eval_rouge1_for_task1157_bard_word_analogy": 7.5, + "eval_rouge1_for_task1158_bard_word_analogy": 3.6667, + "eval_rouge1_for_task1159_bard_word_analogy": 4.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 21.2925, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 69.425, + "eval_rouge1_for_task121_atomic_question_rewriting": 52.3741, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 31.9231, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.5397, + "eval_rouge1_for_task1344_rte_textual_entailment": 10.3099, + "eval_rouge1_for_task1345_qqp_question_rewriting": 38.3773, + "eval_rouge1_for_task1356_xlsum_title_generation": 8.8288, + "eval_rouge1_for_task1358_xlsum_title_generation": 32.1898, + "eval_rouge1_for_task1385_anli_textual_entailment": 4.6917, + "eval_rouge1_for_task1386_anli_textual_entailment": 4.5127, + "eval_rouge1_for_task1387_anli_textual_entailment": 3.9198, + "eval_rouge1_for_task1388_cb_textual_entailment": 3.9937, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 0.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 11.7164, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 7.987, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 3.746, + "eval_rouge1_for_task1407_dart_data_to_text": 39.8241, + "eval_rouge1_for_task1409_dart_data_to_text": 39.2407, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.405, + "eval_rouge1_for_task1439_doqa_answerability_classification": 1.0596, + "eval_rouge1_for_task1442_doqa_answerability_classification": 1.0706, + "eval_rouge1_for_task1516_imppres_textual_entailment": 5.4158, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 19.6444, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 1.5553, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 3.5342, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 2.7169, + "eval_rouge1_for_task1540_peer_read_title_generation": 14.196, + "eval_rouge1_for_task1554_scitail_textual_entailment": 15.5882, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 82.8483, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.1382, + "eval_rouge1_for_task1586_scifact_title_generation": 21.8163, + "eval_rouge1_for_task1598_nyc_data_to_text": 16.9397, + "eval_rouge1_for_task1612_sick_textual_entailment": 28.5714, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.1081, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 71.1411, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 1.2168, + "eval_rouge1_for_task1631_open_pi_data_to_text": 40.5546, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 0.5818, + "eval_rouge1_for_task1659_billsum_title_generation": 26.341, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 33.5072, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 53.1791, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 0.669, + "eval_rouge1_for_task200_multinli_textual_entailment": 4.7158, + "eval_rouge1_for_task201_multinli_textual_entailment": 18.9188, + "eval_rouge1_for_task202_multinli_textual_entailment": 10.6517, + "eval_rouge1_for_task219_rocstories_title_generation": 9.9728, + "eval_rouge1_for_task220_rocstories_title_generation": 27.6468, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 18.1747, + "eval_rouge1_for_task232_iirc_answerability_classification": 9.9888, + "eval_rouge1_for_task233_iirc_answerability_classification": 9.9742, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 11.0912, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 39.7398, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 23.5703, + "eval_rouge1_for_task288_gigaword_title_generation": 21.0021, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 1.0409, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 2.093, + "eval_rouge1_for_task329_gap_coreference_resolution": 15.4782, + "eval_rouge1_for_task330_gap_coreference_resolution": 1.2683, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 14.1005, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 7.2105, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 0.2, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 0.6505, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.6123, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 1.3361, + "eval_rouge1_for_task402_grailqa_question_rewriting": 36.9535, + "eval_rouge1_for_task418_persent_title_generation": 10.5099, + "eval_rouge1_for_task442_com_qa_question_rewriting": 57.9885, + "eval_rouge1_for_task500_scruples_title_generation": 9.1556, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 16.0598, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 42.2822, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 30.3458, + "eval_rouge1_for_task602_wikitext_title_generation": 8.5748, + "eval_rouge1_for_task613_liar_keyword_tagging": 6.2797, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 29.1342, + "eval_rouge1_for_task619_ohsumed_title_generation": 26.7724, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 21.342, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 9.9764, + "eval_rouge1_for_task640_e_snli_textual_entailment": 0.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 0.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 0.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 23.9965, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 18.7289, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.8466, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.7574, + "eval_rouge1_for_task677_ollie_data_to_text": 39.0162, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 14.5742, + "eval_rouge1_for_task743_eurlex_title_generation": 20.7768, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 9.3779, + "eval_rouge1_for_task769_qed_title_generation": 7.1119, + "eval_rouge1_for_task827_copa_cause_effect_classification": 29.1087, + "eval_rouge1_for_task828_copa_cause_effect_classification": 0.4762, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 6.3096, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 9.5214, + "eval_rouge1_for_task890_gwsd_textual_entailment": 2.1667, + "eval_rouge1_for_task891_gap_coreference_resolution": 3.7114, + "eval_rouge1_for_task892_gap_coreference_resolution": 2.7914, + "eval_rouge1_for_task893_gap_coreference_resolution": 2.1142, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 12.0667, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 8.5513, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 9.5944, + "eval_rouge1_for_task957_e2e_data_to_text": 24.943, + "eval_rouge1_for_task970_sherliic_textual_entailment": 0.0, + "eval_rouge1_for_textual_entailment": 10.6527, + "eval_rouge1_for_title_generation": 18.0359, + "eval_rouge1_for_word_analogy": 8.4958, + "eval_rougeL": 17.3623, + "eval_rougeL_for_answerability_classification": 11.6321, + "eval_rougeL_for_cause_effect_classification": 13.6524, + "eval_rougeL_for_coreference_resolution": 12.1839, + "eval_rougeL_for_data_to_text": 30.5828, + "eval_rougeL_for_dialogue_act_recognition": 4.9361, + "eval_rougeL_for_grammar_error_correction": 58.391, + "eval_rougeL_for_keyword_tagging": 20.208, + "eval_rougeL_for_overlap_extraction": 20.9115, + "eval_rougeL_for_question_rewriting": 45.8316, + "eval_rougeL_for_task020_mctaco_answerability_classification": 29.1152, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 6.5624, + "eval_rougeL_for_task034_winogrande_question_rewriting": 5.0202, + "eval_rougeL_for_task035_winogrande_question_rewriting": 9.0916, + "eval_rougeL_for_task036_qasc_keyword_tagging": 42.6331, + "eval_rougeL_for_task039_qasc_overlap_extraction": 18.7849, + "eval_rougeL_for_task050_multirc_answerability_classification": 11.5202, + "eval_rougeL_for_task102_commongen_data_to_text": 38.1874, + "eval_rougeL_for_task1152_bard_word_analogy": 0.0, + "eval_rougeL_for_task1153_bard_word_analogy": 0.0, + "eval_rougeL_for_task1154_bard_word_analogy": 8.8333, + "eval_rougeL_for_task1155_bard_word_analogy": 27.6333, + "eval_rougeL_for_task1156_bard_word_analogy": 16.3333, + "eval_rougeL_for_task1157_bard_word_analogy": 7.5, + "eval_rougeL_for_task1158_bard_word_analogy": 3.6667, + "eval_rougeL_for_task1159_bard_word_analogy": 4.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 15.4335, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 65.784, + "eval_rougeL_for_task121_atomic_question_rewriting": 46.8331, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 31.9231, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 9.8592, + "eval_rougeL_for_task1344_rte_textual_entailment": 10.3099, + "eval_rougeL_for_task1345_qqp_question_rewriting": 35.1591, + "eval_rougeL_for_task1356_xlsum_title_generation": 7.101, + "eval_rougeL_for_task1358_xlsum_title_generation": 25.9612, + "eval_rougeL_for_task1385_anli_textual_entailment": 4.6917, + "eval_rougeL_for_task1386_anli_textual_entailment": 4.5127, + "eval_rougeL_for_task1387_anli_textual_entailment": 3.9198, + "eval_rougeL_for_task1388_cb_textual_entailment": 3.9937, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 0.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 11.7164, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 7.987, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 3.7052, + "eval_rougeL_for_task1407_dart_data_to_text": 28.8718, + "eval_rougeL_for_task1409_dart_data_to_text": 32.6625, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.865, + "eval_rougeL_for_task1439_doqa_answerability_classification": 1.0596, + "eval_rougeL_for_task1442_doqa_answerability_classification": 1.0706, + "eval_rougeL_for_task1516_imppres_textual_entailment": 5.4158, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 19.6444, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 1.5553, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 3.5342, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 2.7169, + "eval_rougeL_for_task1540_peer_read_title_generation": 11.7745, + "eval_rougeL_for_task1554_scitail_textual_entailment": 15.5882, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 81.9171, + "eval_rougeL_for_task1562_zest_question_rewriting": 50.675, + "eval_rougeL_for_task1586_scifact_title_generation": 16.7068, + "eval_rougeL_for_task1598_nyc_data_to_text": 13.8297, + "eval_rougeL_for_task1612_sick_textual_entailment": 28.5714, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.1081, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 68.5045, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 1.2168, + "eval_rougeL_for_task1631_open_pi_data_to_text": 37.2836, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 0.5818, + "eval_rougeL_for_task1659_billsum_title_generation": 22.0517, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 33.3533, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 45.4075, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 0.669, + "eval_rougeL_for_task200_multinli_textual_entailment": 4.7158, + "eval_rougeL_for_task201_multinli_textual_entailment": 18.9188, + "eval_rougeL_for_task202_multinli_textual_entailment": 10.6517, + "eval_rougeL_for_task219_rocstories_title_generation": 9.7741, + "eval_rougeL_for_task220_rocstories_title_generation": 27.6468, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 18.1747, + "eval_rougeL_for_task232_iirc_answerability_classification": 9.9888, + "eval_rougeL_for_task233_iirc_answerability_classification": 9.9742, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 11.0912, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 39.6446, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 23.0381, + "eval_rougeL_for_task288_gigaword_title_generation": 17.2386, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 1.0409, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 2.093, + "eval_rougeL_for_task329_gap_coreference_resolution": 15.4782, + "eval_rougeL_for_task330_gap_coreference_resolution": 1.2683, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 14.1005, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 7.2105, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 0.2, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 0.6505, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 29.4678, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 1.3361, + "eval_rougeL_for_task402_grailqa_question_rewriting": 31.0104, + "eval_rougeL_for_task418_persent_title_generation": 8.6234, + "eval_rougeL_for_task442_com_qa_question_rewriting": 49.6319, + "eval_rougeL_for_task500_scruples_title_generation": 7.4049, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 15.1658, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 42.2822, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 28.9445, + "eval_rougeL_for_task602_wikitext_title_generation": 8.4901, + "eval_rougeL_for_task613_liar_keyword_tagging": 6.1845, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 27.6766, + "eval_rougeL_for_task619_ohsumed_title_generation": 21.6387, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 19.7811, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 9.9764, + "eval_rougeL_for_task640_e_snli_textual_entailment": 0.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 0.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 0.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 22.4649, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 18.5827, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 80.1589, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.2789, + "eval_rougeL_for_task677_ollie_data_to_text": 32.1038, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 14.5742, + "eval_rougeL_for_task743_eurlex_title_generation": 16.3932, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 8.0871, + "eval_rougeL_for_task769_qed_title_generation": 7.0547, + "eval_rougeL_for_task827_copa_cause_effect_classification": 29.1087, + "eval_rougeL_for_task828_copa_cause_effect_classification": 0.4762, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 6.3096, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 9.5214, + "eval_rougeL_for_task890_gwsd_textual_entailment": 2.1667, + "eval_rougeL_for_task891_gap_coreference_resolution": 3.7114, + "eval_rougeL_for_task892_gap_coreference_resolution": 2.7914, + "eval_rougeL_for_task893_gap_coreference_resolution": 2.1142, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 12.0667, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 8.5513, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 9.5944, + "eval_rougeL_for_task957_e2e_data_to_text": 22.1653, + "eval_rougeL_for_task970_sherliic_textual_entailment": 0.0, + "eval_rougeL_for_textual_entailment": 10.6527, + "eval_rougeL_for_title_generation": 15.4655, + "eval_rougeL_for_word_analogy": 8.4958, + "eval_runtime": 3484.303, + "eval_samples_per_second": 3.418, + "eval_steps_per_second": 0.214, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 2.1103, + "step": 50 + }, + { + "epoch": 0.01, + "eval_exact_match": 23.7615, + "eval_exact_match_for_answerability_classification": 37.7692, + "eval_exact_match_for_cause_effect_classification": 34.8571, + "eval_exact_match_for_coreference_resolution": 29.2143, + "eval_exact_match_for_data_to_text": 0.2421, + "eval_exact_match_for_dialogue_act_recognition": 38.4286, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 35.8, + "eval_exact_match_for_overlap_extraction": 6.5, + "eval_exact_match_for_question_rewriting": 0.4545, + "eval_exact_match_for_task020_mctaco_answerability_classification": 46.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 27.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 33.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 13.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 0.0, + "eval_exact_match_for_task1153_bard_word_analogy": 2.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 56.0, + "eval_exact_match_for_task1156_bard_word_analogy": 15.0, + "eval_exact_match_for_task1157_bard_word_analogy": 4.0, + "eval_exact_match_for_task1158_bard_word_analogy": 7.0, + "eval_exact_match_for_task1159_bard_word_analogy": 6.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 44.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 69.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1659_billsum_title_generation": 33.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 39.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 30.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 70.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 49.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 20.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 46.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 8.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 10.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 19.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 70.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 35.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 47.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 21.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 64.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 29.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 38.4583, + "eval_exact_match_for_title_generation": 10.2018, + "eval_exact_match_for_word_analogy": 12.125, + "eval_f1": 36.1556, + "eval_f1_for_answerability_classification": 40.5897, + "eval_f1_for_cause_effect_classification": 50.9542, + "eval_f1_for_coreference_resolution": 36.0242, + "eval_f1_for_data_to_text": 30.6715, + "eval_f1_for_dialogue_act_recognition": 42.0571, + "eval_f1_for_grammar_error_correction": 52.7911, + "eval_f1_for_keyword_tagging": 51.2365, + "eval_f1_for_overlap_extraction": 8.4666, + "eval_f1_for_question_rewriting": 42.2806, + "eval_f1_for_task020_mctaco_answerability_classification": 46.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 27.1333, + "eval_f1_for_task034_winogrande_question_rewriting": 9.6851, + "eval_f1_for_task035_winogrande_question_rewriting": 10.6745, + "eval_f1_for_task036_qasc_keyword_tagging": 68.041, + "eval_f1_for_task039_qasc_overlap_extraction": 14.5833, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 9.8127, + "eval_f1_for_task1152_bard_word_analogy": 0.0, + "eval_f1_for_task1153_bard_word_analogy": 2.0, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 56.0, + "eval_f1_for_task1156_bard_word_analogy": 15.0, + "eval_f1_for_task1157_bard_word_analogy": 4.0, + "eval_f1_for_task1158_bard_word_analogy": 9.3333, + "eval_f1_for_task1159_bard_word_analogy": 6.0, + "eval_f1_for_task1161_coda_19_title_generation": 25.7643, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 44.0826, + "eval_f1_for_task121_atomic_question_rewriting": 49.2206, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.0619, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.2382, + "eval_f1_for_task1356_xlsum_title_generation": 12.2401, + "eval_f1_for_task1358_xlsum_title_generation": 28.766, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 30.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 44.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 69.0, + "eval_f1_for_task1407_dart_data_to_text": 32.9154, + "eval_f1_for_task1409_dart_data_to_text": 46.2738, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 23.2955, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 52.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 14.9303, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 82.2867, + "eval_f1_for_task1562_zest_question_rewriting": 56.1414, + "eval_f1_for_task1586_scifact_title_generation": 21.6055, + "eval_f1_for_task1598_nyc_data_to_text": 25.717, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 13.3293, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_f1_for_task1631_open_pi_data_to_text": 33.4369, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_f1_for_task1659_billsum_title_generation": 48.6496, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 23.7514, + "eval_f1_for_task1728_web_nlg_data_to_text": 34.5281, + "eval_f1_for_task190_snli_textual_entailment": 39.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 30.0, + "eval_f1_for_task219_rocstories_title_generation": 16.1552, + "eval_f1_for_task220_rocstories_title_generation": 70.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.6667, + "eval_f1_for_task232_iirc_answerability_classification": 0.0, + "eval_f1_for_task233_iirc_answerability_classification": 0.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 49.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 22.1667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 2.35, + "eval_f1_for_task288_gigaword_title_generation": 25.2119, + "eval_f1_for_task290_tellmewhy_answerability_classification": 82.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 27.4, + "eval_f1_for_task329_gap_coreference_resolution": 35.0, + "eval_f1_for_task330_gap_coreference_resolution": 56.0111, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 26.9248, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 8.0465, + "eval_f1_for_task402_grailqa_question_rewriting": 68.1249, + "eval_f1_for_task418_persent_title_generation": 7.2147, + "eval_f1_for_task442_com_qa_question_rewriting": 47.6747, + "eval_f1_for_task500_scruples_title_generation": 11.1157, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.1929, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 40.3432, + "eval_f1_for_task602_wikitext_title_generation": 13.1711, + "eval_f1_for_task613_liar_keyword_tagging": 11.8333, + "eval_f1_for_task614_glucose_cause_effect_classification": 19.0881, + "eval_f1_for_task619_ohsumed_title_generation": 31.8839, + "eval_f1_for_task620_ohsumed_keyword_tagging": 38.127, + "eval_f1_for_task623_ohsumed_keyword_tagging": 70.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 35.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 68.181, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 54.4794, + "eval_f1_for_task670_ambigqa_question_rewriting": 69.7742, + "eval_f1_for_task671_ambigqa_question_rewriting": 57.1412, + "eval_f1_for_task677_ollie_data_to_text": 26.7428, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 19.1615, + "eval_f1_for_task760_msr_sqa_data_to_text": 7.6639, + "eval_f1_for_task769_qed_title_generation": 70.2199, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 0.4, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 39.3508, + "eval_f1_for_task892_gap_coreference_resolution": 29.0, + "eval_f1_for_task893_gap_coreference_resolution": 31.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 41.9272, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 38.4583, + "eval_f1_for_title_generation": 28.2276, + "eval_f1_for_word_analogy": 12.4167, + "eval_gen_len": 11.7875, + "eval_global_step": 50, + "eval_loss": 1.385162353515625, + "eval_rouge1": 38.7196, + "eval_rouge1_for_answerability_classification": 40.5897, + "eval_rouge1_for_cause_effect_classification": 52.4601, + "eval_rouge1_for_coreference_resolution": 36.4541, + "eval_rouge1_for_data_to_text": 41.2598, + "eval_rouge1_for_dialogue_act_recognition": 44.8524, + "eval_rouge1_for_grammar_error_correction": 56.7897, + "eval_rouge1_for_keyword_tagging": 56.4091, + "eval_rouge1_for_overlap_extraction": 10.9423, + "eval_rouge1_for_question_rewriting": 43.8494, + "eval_rouge1_for_task020_mctaco_answerability_classification": 46.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 29.1333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 10.0093, + "eval_rouge1_for_task035_winogrande_question_rewriting": 11.2544, + "eval_rouge1_for_task036_qasc_keyword_tagging": 75.2077, + "eval_rouge1_for_task039_qasc_overlap_extraction": 19.5833, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 65.6083, + "eval_rouge1_for_task1152_bard_word_analogy": 0.0, + "eval_rouge1_for_task1153_bard_word_analogy": 2.0, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 56.0, + "eval_rouge1_for_task1156_bard_word_analogy": 15.0, + "eval_rouge1_for_task1157_bard_word_analogy": 4.0, + "eval_rouge1_for_task1158_bard_word_analogy": 9.3333, + "eval_rouge1_for_task1159_bard_word_analogy": 6.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 28.8869, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 44.9466, + "eval_rouge1_for_task121_atomic_question_rewriting": 51.3209, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.5791, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.9043, + "eval_rouge1_for_task1356_xlsum_title_generation": 13.6064, + "eval_rouge1_for_task1358_xlsum_title_generation": 32.7729, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 44.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 73.9, + "eval_rouge1_for_task1407_dart_data_to_text": 39.8148, + "eval_rouge1_for_task1409_dart_data_to_text": 51.5669, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 28.1115, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 16.6314, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 85.4678, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.0686, + "eval_rouge1_for_task1586_scifact_title_generation": 24.7728, + "eval_rouge1_for_task1598_nyc_data_to_text": 25.4677, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 13.4143, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 33.7302, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1659_billsum_title_generation": 49.9712, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 23.7514, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 49.1769, + "eval_rouge1_for_task190_snli_textual_entailment": 39.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 30.0, + "eval_rouge1_for_task219_rocstories_title_generation": 20.44, + "eval_rouge1_for_task220_rocstories_title_generation": 70.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.6667, + "eval_rouge1_for_task232_iirc_answerability_classification": 0.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 0.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 49.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 22.3333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 2.3012, + "eval_rouge1_for_task288_gigaword_title_generation": 28.0028, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 82.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 27.4, + "eval_rouge1_for_task329_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 55.9667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 27.7863, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 10.0426, + "eval_rouge1_for_task402_grailqa_question_rewriting": 70.6783, + "eval_rouge1_for_task418_persent_title_generation": 8.4218, + "eval_rouge1_for_task442_com_qa_question_rewriting": 50.4371, + "eval_rouge1_for_task500_scruples_title_generation": 12.2154, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 35.927, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 42.0103, + "eval_rouge1_for_task602_wikitext_title_generation": 14.0904, + "eval_rouge1_for_task613_liar_keyword_tagging": 21.9, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 28.768, + "eval_rouge1_for_task619_ohsumed_title_generation": 34.1707, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.8853, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 70.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 35.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 70.0524, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 56.4794, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 70.6674, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 57.6426, + "eval_rouge1_for_task677_ollie_data_to_text": 31.4932, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 20.9466, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 8.3679, + "eval_rouge1_for_task769_qed_title_generation": 70.4483, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 0.4, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 39.2503, + "eval_rouge1_for_task892_gap_coreference_resolution": 29.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 41.7724, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 42.4028, + "eval_rouge1_for_title_generation": 29.9686, + "eval_rouge1_for_word_analogy": 12.4167, + "eval_rougeL": 37.5044, + "eval_rougeL_for_answerability_classification": 40.5897, + "eval_rougeL_for_cause_effect_classification": 52.0169, + "eval_rougeL_for_coreference_resolution": 36.4382, + "eval_rougeL_for_data_to_text": 34.4357, + "eval_rougeL_for_dialogue_act_recognition": 44.8524, + "eval_rougeL_for_grammar_error_correction": 55.7493, + "eval_rougeL_for_keyword_tagging": 56.1424, + "eval_rougeL_for_overlap_extraction": 10.9423, + "eval_rougeL_for_question_rewriting": 40.4111, + "eval_rougeL_for_task020_mctaco_answerability_classification": 46.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 29.1333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 10.0093, + "eval_rougeL_for_task035_winogrande_question_rewriting": 11.2544, + "eval_rougeL_for_task036_qasc_keyword_tagging": 75.2077, + "eval_rougeL_for_task039_qasc_overlap_extraction": 19.5833, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 50.5058, + "eval_rougeL_for_task1152_bard_word_analogy": 0.0, + "eval_rougeL_for_task1153_bard_word_analogy": 2.0, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 56.0, + "eval_rougeL_for_task1156_bard_word_analogy": 15.0, + "eval_rougeL_for_task1157_bard_word_analogy": 4.0, + "eval_rougeL_for_task1158_bard_word_analogy": 9.3333, + "eval_rougeL_for_task1159_bard_word_analogy": 6.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 22.9995, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 44.3019, + "eval_rougeL_for_task121_atomic_question_rewriting": 48.043, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.9297, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.5489, + "eval_rougeL_for_task1356_xlsum_title_generation": 11.431, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.4202, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 44.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 73.9, + "eval_rougeL_for_task1407_dart_data_to_text": 29.918, + "eval_rougeL_for_task1409_dart_data_to_text": 40.8016, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 27.0865, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 14.2134, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 84.4121, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.3748, + "eval_rougeL_for_task1586_scifact_title_generation": 19.5452, + "eval_rougeL_for_task1598_nyc_data_to_text": 23.3358, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 13.1476, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 31.8366, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1659_billsum_title_generation": 46.6025, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 23.5292, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 43.6203, + "eval_rougeL_for_task190_snli_textual_entailment": 39.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 30.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.9803, + "eval_rougeL_for_task220_rocstories_title_generation": 70.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.6667, + "eval_rougeL_for_task232_iirc_answerability_classification": 0.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 0.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 49.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 22.3333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 2.3012, + "eval_rougeL_for_task288_gigaword_title_generation": 23.3933, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 82.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 27.4, + "eval_rougeL_for_task329_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 55.9667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 25.9921, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 10.0426, + "eval_rougeL_for_task402_grailqa_question_rewriting": 56.3844, + "eval_rougeL_for_task418_persent_title_generation": 6.9055, + "eval_rougeL_for_task442_com_qa_question_rewriting": 45.9636, + "eval_rougeL_for_task500_scruples_title_generation": 10.5206, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 35.7918, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.3074, + "eval_rougeL_for_task602_wikitext_title_generation": 14.0904, + "eval_rougeL_for_task613_liar_keyword_tagging": 21.9, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 27.4592, + "eval_rougeL_for_task619_ohsumed_title_generation": 28.9869, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 43.5519, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 70.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 35.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 70.0524, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 56.4794, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 68.7016, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 55.7923, + "eval_rougeL_for_task677_ollie_data_to_text": 26.7731, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 16.5201, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.6848, + "eval_rougeL_for_task769_qed_title_generation": 70.4483, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 0.4, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 39.2503, + "eval_rougeL_for_task892_gap_coreference_resolution": 29.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 35.9097, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 42.4028, + "eval_rougeL_for_title_generation": 27.513, + "eval_rougeL_for_word_analogy": 12.4167, + "eval_runtime": 1195.7675, + "eval_samples_per_second": 9.96, + "eval_steps_per_second": 0.623, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 1.4669, + "step": 100 + }, + { + "epoch": 0.02, + "eval_exact_match": 24.047, + "eval_exact_match_for_answerability_classification": 39.4615, + "eval_exact_match_for_cause_effect_classification": 37.0, + "eval_exact_match_for_coreference_resolution": 31.2857, + "eval_exact_match_for_data_to_text": 0.1211, + "eval_exact_match_for_dialogue_act_recognition": 40.1429, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 27.4, + "eval_exact_match_for_overlap_extraction": 2.5, + "eval_exact_match_for_question_rewriting": 1.6364, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 29.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 32.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 5.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 51.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 0.0, + "eval_exact_match_for_task1153_bard_word_analogy": 2.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 54.0, + "eval_exact_match_for_task1156_bard_word_analogy": 16.0, + "eval_exact_match_for_task1157_bard_word_analogy": 5.0, + "eval_exact_match_for_task1158_bard_word_analogy": 6.0, + "eval_exact_match_for_task1159_bard_word_analogy": 6.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 49.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 27.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 57.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 56.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 39.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 1.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 74.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 40.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 52.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 54.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 56.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 53.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 11.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 4.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 5.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 4.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 6.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 46.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 45.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 27.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 39.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 5.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 40.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 44.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 29.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.0, + "eval_exact_match_for_title_generation": 9.2489, + "eval_exact_match_for_word_analogy": 12.0, + "eval_f1": 37.6255, + "eval_f1_for_answerability_classification": 41.9231, + "eval_f1_for_cause_effect_classification": 50.8176, + "eval_f1_for_coreference_resolution": 37.8539, + "eval_f1_for_data_to_text": 32.3999, + "eval_f1_for_dialogue_act_recognition": 43.6429, + "eval_f1_for_grammar_error_correction": 49.7429, + "eval_f1_for_keyword_tagging": 42.2397, + "eval_f1_for_overlap_extraction": 13.3189, + "eval_f1_for_question_rewriting": 51.8954, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 29.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 8.5954, + "eval_f1_for_task035_winogrande_question_rewriting": 9.3433, + "eval_f1_for_task036_qasc_keyword_tagging": 61.2205, + "eval_f1_for_task039_qasc_overlap_extraction": 13.866, + "eval_f1_for_task050_multirc_answerability_classification": 51.0, + "eval_f1_for_task102_commongen_data_to_text": 24.3643, + "eval_f1_for_task1152_bard_word_analogy": 0.0, + "eval_f1_for_task1153_bard_word_analogy": 2.0, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 54.0, + "eval_f1_for_task1156_bard_word_analogy": 16.6667, + "eval_f1_for_task1157_bard_word_analogy": 5.6667, + "eval_f1_for_task1158_bard_word_analogy": 6.0, + "eval_f1_for_task1159_bard_word_analogy": 6.0, + "eval_f1_for_task1161_coda_19_title_generation": 27.1702, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 58.7101, + "eval_f1_for_task121_atomic_question_rewriting": 52.0596, + "eval_f1_for_task133_winowhy_coreference_resolution": 49.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.8483, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.613, + "eval_f1_for_task1356_xlsum_title_generation": 13.3831, + "eval_f1_for_task1358_xlsum_title_generation": 30.4755, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 35.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 27.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_f1_for_task1407_dart_data_to_text": 30.1222, + "eval_f1_for_task1409_dart_data_to_text": 43.3209, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 15.9265, + "eval_f1_for_task1439_doqa_answerability_classification": 57.0, + "eval_f1_for_task1442_doqa_answerability_classification": 54.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 21.2866, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.5593, + "eval_f1_for_task1562_zest_question_rewriting": 56.5846, + "eval_f1_for_task1586_scifact_title_generation": 26.1933, + "eval_f1_for_task1598_nyc_data_to_text": 24.5181, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 56.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 76.9206, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_f1_for_task1631_open_pi_data_to_text": 30.6119, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 53.0645, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 25.9894, + "eval_f1_for_task1728_web_nlg_data_to_text": 42.1458, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 50.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 31.0, + "eval_f1_for_task219_rocstories_title_generation": 15.8593, + "eval_f1_for_task220_rocstories_title_generation": 74.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 0.0, + "eval_f1_for_task233_iirc_answerability_classification": 0.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 52.4833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 12.7718, + "eval_f1_for_task288_gigaword_title_generation": 25.7616, + "eval_f1_for_task290_tellmewhy_answerability_classification": 84.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 14.2333, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 58.2968, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 85.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 30.5077, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 11.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 73.8546, + "eval_f1_for_task418_persent_title_generation": 18.5859, + "eval_f1_for_task442_com_qa_question_rewriting": 64.8466, + "eval_f1_for_task500_scruples_title_generation": 11.3334, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.6377, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 41.3592, + "eval_f1_for_task602_wikitext_title_generation": 15.9763, + "eval_f1_for_task613_liar_keyword_tagging": 10.4048, + "eval_f1_for_task614_glucose_cause_effect_classification": 5.5486, + "eval_f1_for_task619_ohsumed_title_generation": 34.8709, + "eval_f1_for_task620_ohsumed_keyword_tagging": 31.1159, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 30.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 46.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 58.4571, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 54.866, + "eval_f1_for_task670_ambigqa_question_rewriting": 71.6238, + "eval_f1_for_task671_ambigqa_question_rewriting": 59.6973, + "eval_f1_for_task677_ollie_data_to_text": 26.7328, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 22.0462, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.7476, + "eval_f1_for_task769_qed_title_generation": 56.5998, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 5.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 40.0, + "eval_f1_for_task890_gwsd_textual_entailment": 44.0, + "eval_f1_for_task891_gap_coreference_resolution": 36.7524, + "eval_f1_for_task892_gap_coreference_resolution": 29.0, + "eval_f1_for_task893_gap_coreference_resolution": 35.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 44.5731, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 39.0, + "eval_f1_for_title_generation": 30.039, + "eval_f1_for_word_analogy": 12.1667, + "eval_gen_len": 10.0924, + "eval_global_step": 100, + "eval_loss": 1.3632110357284546, + "eval_rouge1": 39.5558, + "eval_rouge1_for_answerability_classification": 41.9231, + "eval_rouge1_for_cause_effect_classification": 50.9642, + "eval_rouge1_for_coreference_resolution": 38.1947, + "eval_rouge1_for_data_to_text": 38.3043, + "eval_rouge1_for_dialogue_act_recognition": 45.1095, + "eval_rouge1_for_grammar_error_correction": 51.8703, + "eval_rouge1_for_keyword_tagging": 46.7359, + "eval_rouge1_for_overlap_extraction": 14.6826, + "eval_rouge1_for_question_rewriting": 53.4583, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 30.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 8.6946, + "eval_rouge1_for_task035_winogrande_question_rewriting": 9.2851, + "eval_rouge1_for_task036_qasc_keyword_tagging": 68.788, + "eval_rouge1_for_task039_qasc_overlap_extraction": 16.721, + "eval_rouge1_for_task050_multirc_answerability_classification": 51.0, + "eval_rouge1_for_task102_commongen_data_to_text": 62.706, + "eval_rouge1_for_task1152_bard_word_analogy": 0.0, + "eval_rouge1_for_task1153_bard_word_analogy": 2.0, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 54.0, + "eval_rouge1_for_task1156_bard_word_analogy": 16.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 5.6667, + "eval_rouge1_for_task1158_bard_word_analogy": 6.0, + "eval_rouge1_for_task1159_bard_word_analogy": 6.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 30.8189, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 59.5847, + "eval_rouge1_for_task121_atomic_question_rewriting": 54.1411, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 49.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.3676, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.256, + "eval_rouge1_for_task1356_xlsum_title_generation": 15.7188, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.8641, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 27.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 65.6, + "eval_rouge1_for_task1407_dart_data_to_text": 30.9106, + "eval_rouge1_for_task1409_dart_data_to_text": 45.6077, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 16.8513, + "eval_rouge1_for_task1439_doqa_answerability_classification": 57.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 23.758, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.8893, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.4893, + "eval_rouge1_for_task1586_scifact_title_generation": 30.5806, + "eval_rouge1_for_task1598_nyc_data_to_text": 24.6694, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 85.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 77.7694, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 30.9409, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 54.7231, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 25.9894, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 44.7341, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task219_rocstories_title_generation": 20.0411, + "eval_rouge1_for_task220_rocstories_title_generation": 74.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 0.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 0.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 53.15, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 12.6441, + "eval_rouge1_for_task288_gigaword_title_generation": 28.7444, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 84.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 15.5667, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 58.2524, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.5819, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 13.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 75.9744, + "eval_rouge1_for_task418_persent_title_generation": 20.9238, + "eval_rouge1_for_task442_com_qa_question_rewriting": 68.0886, + "eval_rouge1_for_task500_scruples_title_generation": 12.2516, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 36.4997, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 43.2109, + "eval_rouge1_for_task602_wikitext_title_generation": 17.064, + "eval_rouge1_for_task613_liar_keyword_tagging": 17.8883, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 5.5009, + "eval_rouge1_for_task619_ohsumed_title_generation": 38.5195, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 37.546, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 46.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 59.4571, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 54.8957, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 72.364, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 60.3935, + "eval_rouge1_for_task677_ollie_data_to_text": 31.0382, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 24.235, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.1112, + "eval_rouge1_for_task769_qed_title_generation": 57.3791, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 5.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 44.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 36.5381, + "eval_rouge1_for_task892_gap_coreference_resolution": 29.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 44.458, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 42.3056, + "eval_rouge1_for_title_generation": 32.2293, + "eval_rouge1_for_word_analogy": 12.1667, + "eval_rougeL": 38.305, + "eval_rougeL_for_answerability_classification": 41.9231, + "eval_rougeL_for_cause_effect_classification": 50.7757, + "eval_rougeL_for_coreference_resolution": 38.1474, + "eval_rougeL_for_data_to_text": 32.6706, + "eval_rougeL_for_dialogue_act_recognition": 45.1095, + "eval_rougeL_for_grammar_error_correction": 50.6499, + "eval_rougeL_for_keyword_tagging": 46.0426, + "eval_rougeL_for_overlap_extraction": 14.6826, + "eval_rougeL_for_question_rewriting": 49.6557, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 30.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 8.6946, + "eval_rougeL_for_task035_winogrande_question_rewriting": 9.1209, + "eval_rougeL_for_task036_qasc_keyword_tagging": 66.9733, + "eval_rougeL_for_task039_qasc_overlap_extraction": 16.721, + "eval_rougeL_for_task050_multirc_answerability_classification": 51.0, + "eval_rougeL_for_task102_commongen_data_to_text": 50.7297, + "eval_rougeL_for_task1152_bard_word_analogy": 0.0, + "eval_rougeL_for_task1153_bard_word_analogy": 2.0, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 54.0, + "eval_rougeL_for_task1156_bard_word_analogy": 16.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 5.6667, + "eval_rougeL_for_task1158_bard_word_analogy": 6.0, + "eval_rougeL_for_task1159_bard_word_analogy": 6.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 24.7553, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 58.4496, + "eval_rougeL_for_task121_atomic_question_rewriting": 51.3098, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 49.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.5434, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.2826, + "eval_rougeL_for_task1356_xlsum_title_generation": 13.2508, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.5601, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 27.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 65.6, + "eval_rougeL_for_task1407_dart_data_to_text": 26.0282, + "eval_rougeL_for_task1409_dart_data_to_text": 39.0873, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 15.4856, + "eval_rougeL_for_task1439_doqa_answerability_classification": 57.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 19.9558, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.8143, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.6438, + "eval_rougeL_for_task1586_scifact_title_generation": 24.1665, + "eval_rougeL_for_task1598_nyc_data_to_text": 21.8282, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 85.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 74.4764, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 29.9505, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 51.1849, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 25.3276, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 37.6485, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.7754, + "eval_rougeL_for_task220_rocstories_title_generation": 74.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 0.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 0.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 53.15, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 12.6441, + "eval_rougeL_for_task288_gigaword_title_generation": 25.229, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 84.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 15.5667, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 58.2524, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.4334, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 13.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 60.27, + "eval_rougeL_for_task418_persent_title_generation": 17.8621, + "eval_rougeL_for_task442_com_qa_question_rewriting": 63.7673, + "eval_rougeL_for_task500_scruples_title_generation": 10.9204, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.3287, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.3136, + "eval_rougeL_for_task602_wikitext_title_generation": 17.064, + "eval_rougeL_for_task613_liar_keyword_tagging": 17.8883, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 5.3298, + "eval_rougeL_for_task619_ohsumed_title_generation": 31.7984, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 35.8942, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 46.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 59.4571, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 54.8957, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 70.6241, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 58.5731, + "eval_rougeL_for_task677_ollie_data_to_text": 26.4272, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 19.1153, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.2553, + "eval_rougeL_for_task769_qed_title_generation": 57.1593, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 5.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 44.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 44.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 36.5381, + "eval_rougeL_for_task892_gap_coreference_resolution": 29.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 37.0532, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 42.3056, + "eval_rougeL_for_title_generation": 29.2743, + "eval_rougeL_for_word_analogy": 12.1667, + "eval_runtime": 1123.6113, + "eval_samples_per_second": 10.6, + "eval_steps_per_second": 0.663, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 1.4075, + "step": 200 + }, + { + "epoch": 0.04, + "eval_exact_match": 24.6516, + "eval_exact_match_for_answerability_classification": 40.6923, + "eval_exact_match_for_cause_effect_classification": 40.0, + "eval_exact_match_for_coreference_resolution": 29.2143, + "eval_exact_match_for_data_to_text": 0.2421, + "eval_exact_match_for_dialogue_act_recognition": 42.1429, + "eval_exact_match_for_grammar_error_correction": 8.5, + "eval_exact_match_for_keyword_tagging": 30.0, + "eval_exact_match_for_overlap_extraction": 6.5, + "eval_exact_match_for_question_rewriting": 1.5455, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 38.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 37.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 13.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 58.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 0.0, + "eval_exact_match_for_task1153_bard_word_analogy": 2.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 60.0, + "eval_exact_match_for_task1156_bard_word_analogy": 26.0, + "eval_exact_match_for_task1157_bard_word_analogy": 10.0, + "eval_exact_match_for_task1158_bard_word_analogy": 15.0, + "eval_exact_match_for_task1159_bard_word_analogy": 5.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 25.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 70.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 34.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 22.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 60.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 17.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 39.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 45.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 12.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1659_billsum_title_generation": 39.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 1.0, + "eval_exact_match_for_task190_snli_textual_entailment": 5.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 57.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 23.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 13.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 73.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 47.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 49.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 52.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 62.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 12.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 0.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 10.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 11.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 32.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 47.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 42.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 19.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 70.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 63.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 5.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 55.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 19.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 72.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 40.0, + "eval_exact_match_for_title_generation": 7.7915, + "eval_exact_match_for_word_analogy": 15.625, + "eval_f1": 37.7612, + "eval_f1_for_answerability_classification": 43.3077, + "eval_f1_for_cause_effect_classification": 54.1468, + "eval_f1_for_coreference_resolution": 35.8544, + "eval_f1_for_data_to_text": 31.7615, + "eval_f1_for_dialogue_act_recognition": 45.7143, + "eval_f1_for_grammar_error_correction": 54.5781, + "eval_f1_for_keyword_tagging": 45.2695, + "eval_f1_for_overlap_extraction": 13.7831, + "eval_f1_for_question_rewriting": 45.6854, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 38.0, + "eval_f1_for_task034_winogrande_question_rewriting": 8.8213, + "eval_f1_for_task035_winogrande_question_rewriting": 8.7534, + "eval_f1_for_task036_qasc_keyword_tagging": 65.6, + "eval_f1_for_task039_qasc_overlap_extraction": 15.0, + "eval_f1_for_task050_multirc_answerability_classification": 58.0, + "eval_f1_for_task102_commongen_data_to_text": 33.4222, + "eval_f1_for_task1152_bard_word_analogy": 0.0, + "eval_f1_for_task1153_bard_word_analogy": 2.0, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 60.0, + "eval_f1_for_task1156_bard_word_analogy": 26.0, + "eval_f1_for_task1157_bard_word_analogy": 10.0, + "eval_f1_for_task1158_bard_word_analogy": 15.0, + "eval_f1_for_task1159_bard_word_analogy": 5.0, + "eval_f1_for_task1161_coda_19_title_generation": 28.6669, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 37.0804, + "eval_f1_for_task121_atomic_question_rewriting": 51.8554, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.9494, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 36.9947, + "eval_f1_for_task1356_xlsum_title_generation": 15.5097, + "eval_f1_for_task1358_xlsum_title_generation": 28.8328, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 25.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 53.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 70.0, + "eval_f1_for_task1407_dart_data_to_text": 29.3402, + "eval_f1_for_task1409_dart_data_to_text": 44.2841, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 25.8486, + "eval_f1_for_task1439_doqa_answerability_classification": 50.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 34.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 22.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 60.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 24.169, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.3076, + "eval_f1_for_task1562_zest_question_rewriting": 55.8721, + "eval_f1_for_task1586_scifact_title_generation": 26.2808, + "eval_f1_for_task1598_nyc_data_to_text": 14.0597, + "eval_f1_for_task1612_sick_textual_entailment": 39.0, + "eval_f1_for_task1615_sick_textual_entailment": 45.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 69.8687, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_f1_for_task1631_open_pi_data_to_text": 31.0207, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_f1_for_task1659_billsum_title_generation": 53.6864, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 29.881, + "eval_f1_for_task1728_web_nlg_data_to_text": 42.5679, + "eval_f1_for_task190_snli_textual_entailment": 5.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 57.0, + "eval_f1_for_task201_multinli_textual_entailment": 32.0, + "eval_f1_for_task202_multinli_textual_entailment": 23.0, + "eval_f1_for_task219_rocstories_title_generation": 20.5891, + "eval_f1_for_task220_rocstories_title_generation": 13.4, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_f1_for_task232_iirc_answerability_classification": 0.0, + "eval_f1_for_task233_iirc_answerability_classification": 0.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 73.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 62.55, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 12.5661, + "eval_f1_for_task288_gigaword_title_generation": 25.5877, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 24.0, + "eval_f1_for_task329_gap_coreference_resolution": 35.0, + "eval_f1_for_task330_gap_coreference_resolution": 52.0111, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 84.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.2041, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 12.0, + "eval_f1_for_task402_grailqa_question_rewriting": 79.4216, + "eval_f1_for_task418_persent_title_generation": 22.8551, + "eval_f1_for_task442_com_qa_question_rewriting": 31.98, + "eval_f1_for_task500_scruples_title_generation": 12.3109, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.1081, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 41.924, + "eval_f1_for_task602_wikitext_title_generation": 15.272, + "eval_f1_for_task613_liar_keyword_tagging": 14.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 8.4902, + "eval_f1_for_task619_ohsumed_title_generation": 35.0373, + "eval_f1_for_task620_ohsumed_keyword_tagging": 31.7857, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 32.0, + "eval_f1_for_task642_e_snli_textual_entailment": 47.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 64.2952, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 35.9333, + "eval_f1_for_task670_ambigqa_question_rewriting": 69.1113, + "eval_f1_for_task671_ambigqa_question_rewriting": 52.7803, + "eval_f1_for_task677_ollie_data_to_text": 27.5478, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 24.1573, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.5021, + "eval_f1_for_task769_qed_title_generation": 83.5207, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 63.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 5.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_f1_for_task890_gwsd_textual_entailment": 55.0, + "eval_f1_for_task891_gap_coreference_resolution": 39.5857, + "eval_f1_for_task892_gap_coreference_resolution": 19.0, + "eval_f1_for_task893_gap_coreference_resolution": 28.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 72.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 38.4172, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 40.0, + "eval_f1_for_title_generation": 29.1712, + "eval_f1_for_word_analogy": 15.625, + "eval_gen_len": 9.346, + "eval_global_step": 200, + "eval_loss": 1.3649206161499023, + "eval_rouge1": 39.9967, + "eval_rouge1_for_answerability_classification": 43.3077, + "eval_rouge1_for_cause_effect_classification": 54.4089, + "eval_rouge1_for_coreference_resolution": 36.1917, + "eval_rouge1_for_data_to_text": 36.4886, + "eval_rouge1_for_dialogue_act_recognition": 49.1714, + "eval_rouge1_for_grammar_error_correction": 56.5706, + "eval_rouge1_for_keyword_tagging": 49.6762, + "eval_rouge1_for_overlap_extraction": 15.3899, + "eval_rouge1_for_question_rewriting": 47.352, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 39.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 8.9086, + "eval_rouge1_for_task035_winogrande_question_rewriting": 9.0488, + "eval_rouge1_for_task036_qasc_keyword_tagging": 71.4, + "eval_rouge1_for_task039_qasc_overlap_extraction": 18.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 58.0, + "eval_rouge1_for_task102_commongen_data_to_text": 60.9085, + "eval_rouge1_for_task1152_bard_word_analogy": 0.0, + "eval_rouge1_for_task1153_bard_word_analogy": 2.0, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 60.0, + "eval_rouge1_for_task1156_bard_word_analogy": 26.0, + "eval_rouge1_for_task1157_bard_word_analogy": 10.0, + "eval_rouge1_for_task1158_bard_word_analogy": 15.0, + "eval_rouge1_for_task1159_bard_word_analogy": 5.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 32.1905, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 38.7526, + "eval_rouge1_for_task121_atomic_question_rewriting": 53.5689, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.5534, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 40.9188, + "eval_rouge1_for_task1356_xlsum_title_generation": 17.8983, + "eval_rouge1_for_task1358_xlsum_title_generation": 32.9318, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 25.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 73.2, + "eval_rouge1_for_task1407_dart_data_to_text": 30.1438, + "eval_rouge1_for_task1409_dart_data_to_text": 45.0142, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 26.7344, + "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 34.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 33.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 60.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 26.2638, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.4069, + "eval_rouge1_for_task1562_zest_question_rewriting": 58.6998, + "eval_rouge1_for_task1586_scifact_title_generation": 29.9471, + "eval_rouge1_for_task1598_nyc_data_to_text": 14.38, + "eval_rouge1_for_task1612_sick_textual_entailment": 39.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 81.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 70.5606, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 31.4084, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1659_billsum_title_generation": 55.4053, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 29.881, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 48.0012, + "eval_rouge1_for_task190_snli_textual_entailment": 5.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 57.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 23.0, + "eval_rouge1_for_task219_rocstories_title_generation": 23.2166, + "eval_rouge1_for_task220_rocstories_title_generation": 13.4, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 0.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 0.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 73.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 63.15, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 12.7797, + "eval_rouge1_for_task288_gigaword_title_generation": 28.6416, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 25.3333, + "eval_rouge1_for_task329_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 51.9667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 84.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.9072, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 14.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 81.6499, + "eval_rouge1_for_task418_persent_title_generation": 25.5562, + "eval_rouge1_for_task442_com_qa_question_rewriting": 34.5636, + "eval_rouge1_for_task500_scruples_title_generation": 13.3575, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 35.7996, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 43.5324, + "eval_rouge1_for_task602_wikitext_title_generation": 15.9426, + "eval_rouge1_for_task613_liar_keyword_tagging": 23.0667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 9.6215, + "eval_rouge1_for_task619_ohsumed_title_generation": 38.3551, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 38.8524, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 32.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 47.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 65.0619, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 35.7667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 70.6374, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 53.5625, + "eval_rouge1_for_task677_ollie_data_to_text": 31.2835, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 77.0, + "eval_rouge1_for_task743_eurlex_title_generation": 26.2497, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.8653, + "eval_rouge1_for_task769_qed_title_generation": 83.9393, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 63.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 5.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 55.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 39.5857, + "eval_rouge1_for_task892_gap_coreference_resolution": 19.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 28.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 72.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 38.4709, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 44.7361, + "eval_rouge1_for_title_generation": 31.1452, + "eval_rouge1_for_word_analogy": 15.625, + "eval_rougeL": 38.8027, + "eval_rougeL_for_answerability_classification": 43.3077, + "eval_rougeL_for_cause_effect_classification": 54.1175, + "eval_rougeL_for_coreference_resolution": 36.1917, + "eval_rougeL_for_data_to_text": 31.5156, + "eval_rougeL_for_dialogue_act_recognition": 49.1714, + "eval_rougeL_for_grammar_error_correction": 55.3172, + "eval_rougeL_for_keyword_tagging": 49.4295, + "eval_rougeL_for_overlap_extraction": 15.3422, + "eval_rougeL_for_question_rewriting": 43.5142, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 39.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 8.9086, + "eval_rougeL_for_task035_winogrande_question_rewriting": 8.9197, + "eval_rougeL_for_task036_qasc_keyword_tagging": 71.4, + "eval_rougeL_for_task039_qasc_overlap_extraction": 18.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 58.0, + "eval_rougeL_for_task102_commongen_data_to_text": 51.5224, + "eval_rougeL_for_task1152_bard_word_analogy": 0.0, + "eval_rougeL_for_task1153_bard_word_analogy": 2.0, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 60.0, + "eval_rougeL_for_task1156_bard_word_analogy": 26.0, + "eval_rougeL_for_task1157_bard_word_analogy": 10.0, + "eval_rougeL_for_task1158_bard_word_analogy": 15.0, + "eval_rougeL_for_task1159_bard_word_analogy": 5.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 26.0117, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 38.0172, + "eval_rougeL_for_task121_atomic_question_rewriting": 50.6845, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.7751, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.0322, + "eval_rougeL_for_task1356_xlsum_title_generation": 15.0988, + "eval_rougeL_for_task1358_xlsum_title_generation": 26.7236, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 25.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 73.2, + "eval_rougeL_for_task1407_dart_data_to_text": 25.3158, + "eval_rougeL_for_task1409_dart_data_to_text": 38.4149, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 25.17, + "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 34.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 33.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 60.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 22.7048, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.4644, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.6582, + "eval_rougeL_for_task1586_scifact_title_generation": 24.466, + "eval_rougeL_for_task1598_nyc_data_to_text": 12.7756, + "eval_rougeL_for_task1612_sick_textual_entailment": 39.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 81.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 67.4471, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 30.8084, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1659_billsum_title_generation": 51.9061, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 29.881, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 41.2159, + "eval_rougeL_for_task190_snli_textual_entailment": 5.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 57.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 23.0, + "eval_rougeL_for_task219_rocstories_title_generation": 22.7642, + "eval_rougeL_for_task220_rocstories_title_generation": 13.4, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 46.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 0.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 0.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 73.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 63.15, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 12.6845, + "eval_rougeL_for_task288_gigaword_title_generation": 24.9951, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 25.3333, + "eval_rougeL_for_task329_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 51.9667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 84.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.1804, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 14.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 64.7162, + "eval_rougeL_for_task418_persent_title_generation": 21.9832, + "eval_rougeL_for_task442_com_qa_question_rewriting": 30.2258, + "eval_rougeL_for_task500_scruples_title_generation": 11.9905, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 35.7622, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 41.2651, + "eval_rougeL_for_task602_wikitext_title_generation": 15.9426, + "eval_rougeL_for_task613_liar_keyword_tagging": 23.0667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 9.3085, + "eval_rougeL_for_task619_ohsumed_title_generation": 31.1366, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 37.619, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 32.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 47.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 65.0619, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 35.7667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 68.2903, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 51.7564, + "eval_rougeL_for_task677_ollie_data_to_text": 25.9731, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 77.0, + "eval_rougeL_for_task743_eurlex_title_generation": 20.2787, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.1437, + "eval_rougeL_for_task769_qed_title_generation": 83.9393, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 63.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 5.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 55.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 39.5857, + "eval_rougeL_for_task892_gap_coreference_resolution": 19.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 28.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 72.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 32.6952, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 44.7361, + "eval_rougeL_for_title_generation": 28.1722, + "eval_rougeL_for_word_analogy": 15.625, + "eval_runtime": 996.7138, + "eval_samples_per_second": 11.949, + "eval_steps_per_second": 0.747, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 1.2724, + "step": 500 + }, + { + "epoch": 0.11, + "eval_exact_match": 24.8615, + "eval_exact_match_for_answerability_classification": 42.2308, + "eval_exact_match_for_cause_effect_classification": 37.1429, + "eval_exact_match_for_coreference_resolution": 32.7143, + "eval_exact_match_for_data_to_text": 0.3632, + "eval_exact_match_for_dialogue_act_recognition": 37.8571, + "eval_exact_match_for_grammar_error_correction": 8.5, + "eval_exact_match_for_keyword_tagging": 27.4, + "eval_exact_match_for_overlap_extraction": 4.0, + "eval_exact_match_for_question_rewriting": 1.0, + "eval_exact_match_for_task020_mctaco_answerability_classification": 42.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 40.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 30.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 8.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 64.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 1.0, + "eval_exact_match_for_task1153_bard_word_analogy": 1.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 35.0, + "eval_exact_match_for_task1156_bard_word_analogy": 15.0, + "eval_exact_match_for_task1157_bard_word_analogy": 11.0, + "eval_exact_match_for_task1158_bard_word_analogy": 15.0, + "eval_exact_match_for_task1159_bard_word_analogy": 6.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 52.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 51.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 68.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 63.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 34.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 17.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 60.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 66.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1659_billsum_title_generation": 39.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 56.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task219_rocstories_title_generation": 1.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 79.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 53.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 47.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 14.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, + "eval_exact_match_for_task602_wikitext_title_generation": 5.9524, + "eval_exact_match_for_task613_liar_keyword_tagging": 12.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 20.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 42.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 25.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 30.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 54.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 59.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 22.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 40.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 15.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 23.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 41.7083, + "eval_exact_match_for_title_generation": 9.0247, + "eval_exact_match_for_word_analogy": 11.375, + "eval_f1": 39.0909, + "eval_f1_for_answerability_classification": 44.9487, + "eval_f1_for_cause_effect_classification": 53.1313, + "eval_f1_for_coreference_resolution": 39.3264, + "eval_f1_for_data_to_text": 36.8047, + "eval_f1_for_dialogue_act_recognition": 41.4286, + "eval_f1_for_grammar_error_correction": 52.8416, + "eval_f1_for_keyword_tagging": 43.2543, + "eval_f1_for_overlap_extraction": 8.6818, + "eval_f1_for_question_rewriting": 48.5015, + "eval_f1_for_task020_mctaco_answerability_classification": 42.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 41.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 20.7269, + "eval_f1_for_task035_winogrande_question_rewriting": 16.9777, + "eval_f1_for_task036_qasc_keyword_tagging": 61.8, + "eval_f1_for_task039_qasc_overlap_extraction": 8.6667, + "eval_f1_for_task050_multirc_answerability_classification": 64.0, + "eval_f1_for_task102_commongen_data_to_text": 46.5655, + "eval_f1_for_task1152_bard_word_analogy": 1.0, + "eval_f1_for_task1153_bard_word_analogy": 1.0, + "eval_f1_for_task1154_bard_word_analogy": 9.6667, + "eval_f1_for_task1155_bard_word_analogy": 40.0, + "eval_f1_for_task1156_bard_word_analogy": 16.3333, + "eval_f1_for_task1157_bard_word_analogy": 11.0, + "eval_f1_for_task1158_bard_word_analogy": 15.6667, + "eval_f1_for_task1159_bard_word_analogy": 8.0, + "eval_f1_for_task1161_coda_19_title_generation": 35.426, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 48.1937, + "eval_f1_for_task121_atomic_question_rewriting": 49.3731, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 12.586, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 37.7528, + "eval_f1_for_task1356_xlsum_title_generation": 19.769, + "eval_f1_for_task1358_xlsum_title_generation": 29.6041, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 52.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 51.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 68.0, + "eval_f1_for_task1407_dart_data_to_text": 33.2785, + "eval_f1_for_task1409_dart_data_to_text": 47.5441, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 21.8344, + "eval_f1_for_task1439_doqa_answerability_classification": 52.0, + "eval_f1_for_task1442_doqa_answerability_classification": 63.0, + "eval_f1_for_task1516_imppres_textual_entailment": 34.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 31.0763, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.8488, + "eval_f1_for_task1562_zest_question_rewriting": 54.5262, + "eval_f1_for_task1586_scifact_title_generation": 33.3206, + "eval_f1_for_task1598_nyc_data_to_text": 16.3572, + "eval_f1_for_task1612_sick_textual_entailment": 60.0, + "eval_f1_for_task1615_sick_textual_entailment": 66.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 69.7451, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_f1_for_task1631_open_pi_data_to_text": 42.3949, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_f1_for_task1659_billsum_title_generation": 52.3474, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 24.2333, + "eval_f1_for_task1728_web_nlg_data_to_text": 45.1358, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 56.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 31.0, + "eval_f1_for_task219_rocstories_title_generation": 19.5999, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_f1_for_task232_iirc_answerability_classification": 0.0, + "eval_f1_for_task233_iirc_answerability_classification": 0.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 79.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 64.55, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 8.697, + "eval_f1_for_task288_gigaword_title_generation": 26.5047, + "eval_f1_for_task290_tellmewhy_answerability_classification": 82.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 4.0, + "eval_f1_for_task329_gap_coreference_resolution": 34.0, + "eval_f1_for_task330_gap_coreference_resolution": 59.8524, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.6725, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 16.0, + "eval_f1_for_task402_grailqa_question_rewriting": 79.4498, + "eval_f1_for_task418_persent_title_generation": 23.7969, + "eval_f1_for_task442_com_qa_question_rewriting": 35.3566, + "eval_f1_for_task500_scruples_title_generation": 15.1421, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.4568, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 41.155, + "eval_f1_for_task602_wikitext_title_generation": 14.3893, + "eval_f1_for_task613_liar_keyword_tagging": 17.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 13.5801, + "eval_f1_for_task619_ohsumed_title_generation": 39.412, + "eval_f1_for_task620_ohsumed_keyword_tagging": 38.6333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 42.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 48.5048, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 64.9476, + "eval_f1_for_task670_ambigqa_question_rewriting": 67.4457, + "eval_f1_for_task671_ambigqa_question_rewriting": 53.9688, + "eval_f1_for_task677_ollie_data_to_text": 31.8245, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 26.7675, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.2687, + "eval_f1_for_task769_qed_title_generation": 73.3497, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 59.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 22.0, + "eval_f1_for_task890_gwsd_textual_entailment": 40.0, + "eval_f1_for_task891_gap_coreference_resolution": 51.6524, + "eval_f1_for_task892_gap_coreference_resolution": 15.0, + "eval_f1_for_task893_gap_coreference_resolution": 23.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 39.2762, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 41.7083, + "eval_f1_for_title_generation": 32.4216, + "eval_f1_for_word_analogy": 12.8333, + "eval_gen_len": 8.5111, + "eval_global_step": 500, + "eval_loss": 1.3686068058013916, + "eval_rouge1": 41.1545, + "eval_rouge1_for_answerability_classification": 44.9487, + "eval_rouge1_for_cause_effect_classification": 53.6356, + "eval_rouge1_for_coreference_resolution": 39.9283, + "eval_rouge1_for_data_to_text": 39.9229, + "eval_rouge1_for_dialogue_act_recognition": 44.5408, + "eval_rouge1_for_grammar_error_correction": 55.6494, + "eval_rouge1_for_keyword_tagging": 47.7048, + "eval_rouge1_for_overlap_extraction": 11.1752, + "eval_rouge1_for_question_rewriting": 50.2743, + "eval_rouge1_for_task020_mctaco_answerability_classification": 42.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 41.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 21.2785, + "eval_rouge1_for_task035_winogrande_question_rewriting": 17.6187, + "eval_rouge1_for_task036_qasc_keyword_tagging": 65.7333, + "eval_rouge1_for_task039_qasc_overlap_extraction": 13.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 64.0, + "eval_rouge1_for_task102_commongen_data_to_text": 60.7502, + "eval_rouge1_for_task1152_bard_word_analogy": 1.0, + "eval_rouge1_for_task1153_bard_word_analogy": 1.0, + "eval_rouge1_for_task1154_bard_word_analogy": 9.6667, + "eval_rouge1_for_task1155_bard_word_analogy": 40.0, + "eval_rouge1_for_task1156_bard_word_analogy": 17.0, + "eval_rouge1_for_task1157_bard_word_analogy": 11.0, + "eval_rouge1_for_task1158_bard_word_analogy": 15.6667, + "eval_rouge1_for_task1159_bard_word_analogy": 8.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 39.1303, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 49.7378, + "eval_rouge1_for_task121_atomic_question_rewriting": 51.5587, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.1633, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.3902, + "eval_rouge1_for_task1356_xlsum_title_generation": 22.5227, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.8467, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 52.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 51.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 75.4524, + "eval_rouge1_for_task1407_dart_data_to_text": 34.0561, + "eval_rouge1_for_task1409_dart_data_to_text": 48.5602, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 24.2766, + "eval_rouge1_for_task1439_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 63.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 34.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 26.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 34.0378, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.0222, + "eval_rouge1_for_task1562_zest_question_rewriting": 57.189, + "eval_rouge1_for_task1586_scifact_title_generation": 37.7054, + "eval_rouge1_for_task1598_nyc_data_to_text": 16.5124, + "eval_rouge1_for_task1612_sick_textual_entailment": 60.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 88.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 71.1381, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 43.1044, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1659_billsum_title_generation": 53.8522, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 24.5667, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 49.4494, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 56.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.948, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 0.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 0.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 79.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 64.7667, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 9.0171, + "eval_rouge1_for_task288_gigaword_title_generation": 29.3438, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 82.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 4.6667, + "eval_rouge1_for_task329_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 59.6302, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.3158, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 20.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 81.5011, + "eval_rouge1_for_task418_persent_title_generation": 27.3542, + "eval_rouge1_for_task442_com_qa_question_rewriting": 38.3883, + "eval_rouge1_for_task500_scruples_title_generation": 16.8196, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 36.2258, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 42.0888, + "eval_rouge1_for_task602_wikitext_title_generation": 15.5686, + "eval_rouge1_for_task613_liar_keyword_tagging": 27.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 16.4667, + "eval_rouge1_for_task619_ohsumed_title_generation": 43.6264, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 45.3333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 42.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 50.4571, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 68.181, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 68.2443, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 54.9722, + "eval_rouge1_for_task677_ollie_data_to_text": 35.3952, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 74.0, + "eval_rouge1_for_task743_eurlex_title_generation": 28.7903, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.4844, + "eval_rouge1_for_task769_qed_title_generation": 73.4474, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 59.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 40.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 51.8524, + "eval_rouge1_for_task892_gap_coreference_resolution": 15.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 23.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 40.2495, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 45.7361, + "eval_rouge1_for_title_generation": 34.472, + "eval_rouge1_for_word_analogy": 12.9167, + "eval_rougeL": 39.7737, + "eval_rougeL_for_answerability_classification": 44.9487, + "eval_rougeL_for_cause_effect_classification": 53.3051, + "eval_rougeL_for_coreference_resolution": 39.9215, + "eval_rougeL_for_data_to_text": 34.2727, + "eval_rougeL_for_dialogue_act_recognition": 44.5408, + "eval_rougeL_for_grammar_error_correction": 54.4859, + "eval_rougeL_for_keyword_tagging": 47.4914, + "eval_rougeL_for_overlap_extraction": 11.1752, + "eval_rougeL_for_question_rewriting": 45.5487, + "eval_rougeL_for_task020_mctaco_answerability_classification": 42.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 41.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 19.9179, + "eval_rougeL_for_task035_winogrande_question_rewriting": 16.9248, + "eval_rougeL_for_task036_qasc_keyword_tagging": 65.7333, + "eval_rougeL_for_task039_qasc_overlap_extraction": 13.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 64.0, + "eval_rougeL_for_task102_commongen_data_to_text": 53.4448, + "eval_rougeL_for_task1152_bard_word_analogy": 1.0, + "eval_rougeL_for_task1153_bard_word_analogy": 1.0, + "eval_rougeL_for_task1154_bard_word_analogy": 9.6667, + "eval_rougeL_for_task1155_bard_word_analogy": 40.0, + "eval_rougeL_for_task1156_bard_word_analogy": 17.0, + "eval_rougeL_for_task1157_bard_word_analogy": 11.0, + "eval_rougeL_for_task1158_bard_word_analogy": 15.6667, + "eval_rougeL_for_task1159_bard_word_analogy": 8.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 31.6447, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 46.5047, + "eval_rougeL_for_task121_atomic_question_rewriting": 47.6518, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.6104, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.5803, + "eval_rougeL_for_task1356_xlsum_title_generation": 19.0569, + "eval_rougeL_for_task1358_xlsum_title_generation": 27.8856, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 52.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 51.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 75.4524, + "eval_rougeL_for_task1407_dart_data_to_text": 27.7928, + "eval_rougeL_for_task1409_dart_data_to_text": 41.4278, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 22.8658, + "eval_rougeL_for_task1439_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 63.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 34.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 26.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 29.2362, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.106, + "eval_rougeL_for_task1562_zest_question_rewriting": 50.0005, + "eval_rougeL_for_task1586_scifact_title_generation": 30.5897, + "eval_rougeL_for_task1598_nyc_data_to_text": 13.9973, + "eval_rougeL_for_task1612_sick_textual_entailment": 60.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 88.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 67.5228, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 40.1852, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1659_billsum_title_generation": 50.4659, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 24.5667, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 42.7672, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 56.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.5661, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 0.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 0.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 79.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 64.7667, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 9.0171, + "eval_rougeL_for_task288_gigaword_title_generation": 25.1995, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 82.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 4.6667, + "eval_rougeL_for_task329_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 59.6302, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.9984, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 20.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 64.1615, + "eval_rougeL_for_task418_persent_title_generation": 24.0097, + "eval_rougeL_for_task442_com_qa_question_rewriting": 32.1721, + "eval_rougeL_for_task500_scruples_title_generation": 14.5016, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.161, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 39.6747, + "eval_rougeL_for_task602_wikitext_title_generation": 15.5686, + "eval_rougeL_for_task613_liar_keyword_tagging": 27.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 15.471, + "eval_rougeL_for_task619_ohsumed_title_generation": 35.4324, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 44.2667, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 42.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 50.4571, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 68.0857, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 65.5101, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 52.0895, + "eval_rougeL_for_task677_ollie_data_to_text": 28.8495, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 74.0, + "eval_rougeL_for_task743_eurlex_title_generation": 22.543, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.3636, + "eval_rougeL_for_task769_qed_title_generation": 73.3297, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 59.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 40.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 51.8524, + "eval_rougeL_for_task892_gap_coreference_resolution": 15.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 23.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 33.2332, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 45.7361, + "eval_rougeL_for_title_generation": 31.109, + "eval_rougeL_for_word_analogy": 12.9167, + "eval_runtime": 968.8478, + "eval_samples_per_second": 12.293, + "eval_steps_per_second": 0.769, + "step": 500 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 1.1487, + "step": 1000 + }, + { + "epoch": 0.22, + "eval_exact_match": 26.1797, + "eval_exact_match_for_answerability_classification": 43.9231, + "eval_exact_match_for_cause_effect_classification": 38.0, + "eval_exact_match_for_coreference_resolution": 37.8571, + "eval_exact_match_for_data_to_text": 0.7264, + "eval_exact_match_for_dialogue_act_recognition": 38.1429, + "eval_exact_match_for_grammar_error_correction": 8.5, + "eval_exact_match_for_keyword_tagging": 32.6, + "eval_exact_match_for_overlap_extraction": 2.5, + "eval_exact_match_for_question_rewriting": 1.6364, + "eval_exact_match_for_task020_mctaco_answerability_classification": 47.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 56.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 45.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 5.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 55.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 10.0, + "eval_exact_match_for_task1154_bard_word_analogy": 5.0, + "eval_exact_match_for_task1155_bard_word_analogy": 39.0, + "eval_exact_match_for_task1156_bard_word_analogy": 34.0, + "eval_exact_match_for_task1157_bard_word_analogy": 26.0, + "eval_exact_match_for_task1158_bard_word_analogy": 24.0, + "eval_exact_match_for_task1159_bard_word_analogy": 6.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 62.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 60.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 70.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 34.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 44.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 17.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 50.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 8.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 1.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_exact_match_for_task1659_billsum_title_generation": 38.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 45.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 68.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 29.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 86.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 26.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 25.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 56.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 41.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 59.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 51.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 49.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 52.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 1.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 14.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 53.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 3.0, + "eval_exact_match_for_task602_wikitext_title_generation": 5.9524, + "eval_exact_match_for_task613_liar_keyword_tagging": 8.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 14.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 46.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 46.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 34.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 58.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 21.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 58.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 56.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 27.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 29.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 1.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 51.0, + "eval_exact_match_for_textual_entailment": 38.7917, + "eval_exact_match_for_title_generation": 11.0426, + "eval_exact_match_for_word_analogy": 18.375, + "eval_f1": 42.0407, + "eval_f1_for_answerability_classification": 46.5064, + "eval_f1_for_cause_effect_classification": 55.7193, + "eval_f1_for_coreference_resolution": 44.8349, + "eval_f1_for_data_to_text": 40.057, + "eval_f1_for_dialogue_act_recognition": 41.8095, + "eval_f1_for_grammar_error_correction": 59.0366, + "eval_f1_for_keyword_tagging": 49.6933, + "eval_f1_for_overlap_extraction": 13.968, + "eval_f1_for_question_rewriting": 57.5455, + "eval_f1_for_task020_mctaco_answerability_classification": 47.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 58.0, + "eval_f1_for_task034_winogrande_question_rewriting": 36.6576, + "eval_f1_for_task035_winogrande_question_rewriting": 38.1802, + "eval_f1_for_task036_qasc_keyword_tagging": 72.8667, + "eval_f1_for_task039_qasc_overlap_extraction": 6.3333, + "eval_f1_for_task050_multirc_answerability_classification": 55.0, + "eval_f1_for_task102_commongen_data_to_text": 49.2169, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 10.0, + "eval_f1_for_task1154_bard_word_analogy": 9.6667, + "eval_f1_for_task1155_bard_word_analogy": 41.6667, + "eval_f1_for_task1156_bard_word_analogy": 36.0, + "eval_f1_for_task1157_bard_word_analogy": 26.0, + "eval_f1_for_task1158_bard_word_analogy": 24.6667, + "eval_f1_for_task1159_bard_word_analogy": 7.3333, + "eval_f1_for_task1161_coda_19_title_generation": 37.8996, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 69.4137, + "eval_f1_for_task121_atomic_question_rewriting": 52.9807, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.7199, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 41.2743, + "eval_f1_for_task1356_xlsum_title_generation": 19.602, + "eval_f1_for_task1358_xlsum_title_generation": 33.4215, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 62.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 60.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 70.0, + "eval_f1_for_task1407_dart_data_to_text": 32.6618, + "eval_f1_for_task1409_dart_data_to_text": 50.5182, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 33.9327, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 34.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 44.6667, + "eval_f1_for_task1540_peer_read_title_generation": 34.5397, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.1405, + "eval_f1_for_task1562_zest_question_rewriting": 57.5983, + "eval_f1_for_task1586_scifact_title_generation": 36.035, + "eval_f1_for_task1598_nyc_data_to_text": 15.1671, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 50.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 73.6632, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 1.0, + "eval_f1_for_task1631_open_pi_data_to_text": 41.5983, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_f1_for_task1659_billsum_title_generation": 52.3638, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 30.5, + "eval_f1_for_task1728_web_nlg_data_to_text": 52.4014, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 45.0, + "eval_f1_for_task200_multinli_textual_entailment": 68.0, + "eval_f1_for_task201_multinli_textual_entailment": 31.0, + "eval_f1_for_task202_multinli_textual_entailment": 29.0, + "eval_f1_for_task219_rocstories_title_generation": 18.4932, + "eval_f1_for_task220_rocstories_title_generation": 86.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_f1_for_task232_iirc_answerability_classification": 26.25, + "eval_f1_for_task233_iirc_answerability_classification": 25.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 56.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 53.2714, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 21.6027, + "eval_f1_for_task288_gigaword_title_generation": 30.306, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 8.8333, + "eval_f1_for_task329_gap_coreference_resolution": 51.0, + "eval_f1_for_task330_gap_coreference_resolution": 66.8952, + "eval_f1_for_task349_squad2.0_answerability_classification": 51.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 36.3531, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 14.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 78.0215, + "eval_f1_for_task418_persent_title_generation": 26.5382, + "eval_f1_for_task442_com_qa_question_rewriting": 57.9213, + "eval_f1_for_task500_scruples_title_generation": 18.7572, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 34.2426, + "eval_f1_for_task520_aquamuse_answerability_classification": 53.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 32.9751, + "eval_f1_for_task602_wikitext_title_generation": 17.3247, + "eval_f1_for_task613_liar_keyword_tagging": 16.119, + "eval_f1_for_task614_glucose_cause_effect_classification": 22.6821, + "eval_f1_for_task619_ohsumed_title_generation": 43.5774, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.7143, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 46.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 71.7667, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 66.8556, + "eval_f1_for_task670_ambigqa_question_rewriting": 69.3013, + "eval_f1_for_task671_ambigqa_question_rewriting": 57.9884, + "eval_f1_for_task677_ollie_data_to_text": 33.8381, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 31.314, + "eval_f1_for_task760_msr_sqa_data_to_text": 7.8749, + "eval_f1_for_task769_qed_title_generation": 80.0534, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 54.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 2.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 21.0, + "eval_f1_for_task890_gwsd_textual_entailment": 58.0, + "eval_f1_for_task891_gap_coreference_resolution": 59.6667, + "eval_f1_for_task892_gap_coreference_resolution": 27.0, + "eval_f1_for_task893_gap_coreference_resolution": 29.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 53.4213, + "eval_f1_for_task970_sherliic_textual_entailment": 51.0, + "eval_f1_for_textual_entailment": 38.7917, + "eval_f1_for_title_generation": 36.1206, + "eval_f1_for_word_analogy": 19.7917, + "eval_gen_len": 8.3114, + "eval_global_step": 1000, + "eval_loss": 1.404618263244629, + "eval_rouge1": 44.1703, + "eval_rouge1_for_answerability_classification": 46.5064, + "eval_rouge1_for_cause_effect_classification": 56.6386, + "eval_rouge1_for_coreference_resolution": 44.9354, + "eval_rouge1_for_data_to_text": 43.0211, + "eval_rouge1_for_dialogue_act_recognition": 44.2476, + "eval_rouge1_for_grammar_error_correction": 62.3261, + "eval_rouge1_for_keyword_tagging": 52.93, + "eval_rouge1_for_overlap_extraction": 16.2778, + "eval_rouge1_for_question_rewriting": 59.1004, + "eval_rouge1_for_task020_mctaco_answerability_classification": 47.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 58.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 36.67, + "eval_rouge1_for_task035_winogrande_question_rewriting": 39.0919, + "eval_rouge1_for_task036_qasc_keyword_tagging": 76.5, + "eval_rouge1_for_task039_qasc_overlap_extraction": 11.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 55.0, + "eval_rouge1_for_task102_commongen_data_to_text": 60.5136, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 10.0, + "eval_rouge1_for_task1154_bard_word_analogy": 9.6667, + "eval_rouge1_for_task1155_bard_word_analogy": 41.6667, + "eval_rouge1_for_task1156_bard_word_analogy": 36.0, + "eval_rouge1_for_task1157_bard_word_analogy": 26.0, + "eval_rouge1_for_task1158_bard_word_analogy": 24.6667, + "eval_rouge1_for_task1159_bard_word_analogy": 7.3333, + "eval_rouge1_for_task1161_coda_19_title_generation": 41.8773, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 70.3725, + "eval_rouge1_for_task121_atomic_question_rewriting": 54.5621, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.2785, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 44.2985, + "eval_rouge1_for_task1356_xlsum_title_generation": 23.5776, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.2753, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 62.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 60.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 72.9, + "eval_rouge1_for_task1407_dart_data_to_text": 33.8805, + "eval_rouge1_for_task1409_dart_data_to_text": 51.1566, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.3745, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 34.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 26.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 44.6667, + "eval_rouge1_for_task1540_peer_read_title_generation": 38.168, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.2777, + "eval_rouge1_for_task1562_zest_question_rewriting": 60.4667, + "eval_rouge1_for_task1586_scifact_title_generation": 40.0848, + "eval_rouge1_for_task1598_nyc_data_to_text": 15.2063, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 83.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 74.2267, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 1.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 42.0863, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_rouge1_for_task1659_billsum_title_generation": 54.1048, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 30.5, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 57.4504, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 45.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 68.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 29.0, + "eval_rouge1_for_task219_rocstories_title_generation": 22.3988, + "eval_rouge1_for_task220_rocstories_title_generation": 86.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 26.25, + "eval_rouge1_for_task233_iirc_answerability_classification": 25.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 56.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 54.1167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 21.5556, + "eval_rouge1_for_task288_gigaword_title_generation": 33.1599, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 9.5, + "eval_rouge1_for_task329_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 66.8238, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 51.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 37.1795, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 14.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 80.0573, + "eval_rouge1_for_task418_persent_title_generation": 30.651, + "eval_rouge1_for_task442_com_qa_question_rewriting": 61.3933, + "eval_rouge1_for_task500_scruples_title_generation": 21.3689, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 34.3083, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 53.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 34.7083, + "eval_rouge1_for_task602_wikitext_title_generation": 17.4758, + "eval_rouge1_for_task613_liar_keyword_tagging": 20.3999, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 28.2907, + "eval_rouge1_for_task619_ohsumed_title_generation": 46.8467, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.1833, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 46.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 73.5667, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 66.8222, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 70.136, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 58.8293, + "eval_rouge1_for_task677_ollie_data_to_text": 37.5724, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 84.0, + "eval_rouge1_for_task743_eurlex_title_generation": 33.7846, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 8.3721, + "eval_rouge1_for_task769_qed_title_generation": 79.541, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.1667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 58.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 59.6667, + "eval_rouge1_for_task892_gap_coreference_resolution": 27.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 29.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 55.3112, + "eval_rouge1_for_task970_sherliic_textual_entailment": 51.0, + "eval_rouge1_for_textual_entailment": 43.6806, + "eval_rouge1_for_title_generation": 38.4985, + "eval_rouge1_for_word_analogy": 19.7917, + "eval_rougeL": 42.714, + "eval_rougeL_for_answerability_classification": 46.5064, + "eval_rougeL_for_cause_effect_classification": 56.1336, + "eval_rougeL_for_coreference_resolution": 44.9354, + "eval_rougeL_for_data_to_text": 36.6769, + "eval_rougeL_for_dialogue_act_recognition": 44.2476, + "eval_rougeL_for_grammar_error_correction": 60.9595, + "eval_rougeL_for_keyword_tagging": 52.6462, + "eval_rougeL_for_overlap_extraction": 16.2778, + "eval_rougeL_for_question_rewriting": 54.5152, + "eval_rougeL_for_task020_mctaco_answerability_classification": 47.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 58.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 35.9356, + "eval_rougeL_for_task035_winogrande_question_rewriting": 37.1325, + "eval_rougeL_for_task036_qasc_keyword_tagging": 76.5, + "eval_rougeL_for_task039_qasc_overlap_extraction": 11.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 55.0, + "eval_rougeL_for_task102_commongen_data_to_text": 52.9238, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 10.0, + "eval_rougeL_for_task1154_bard_word_analogy": 9.6667, + "eval_rougeL_for_task1155_bard_word_analogy": 41.6667, + "eval_rougeL_for_task1156_bard_word_analogy": 36.0, + "eval_rougeL_for_task1157_bard_word_analogy": 26.0, + "eval_rougeL_for_task1158_bard_word_analogy": 24.6667, + "eval_rougeL_for_task1159_bard_word_analogy": 7.3333, + "eval_rougeL_for_task1161_coda_19_title_generation": 33.6947, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 67.4455, + "eval_rougeL_for_task121_atomic_question_rewriting": 51.193, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.903, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.9075, + "eval_rougeL_for_task1356_xlsum_title_generation": 20.2983, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.3677, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 62.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 60.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 72.9, + "eval_rougeL_for_task1407_dart_data_to_text": 27.5439, + "eval_rougeL_for_task1409_dart_data_to_text": 44.5259, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.5575, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 34.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 26.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 55.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 44.6667, + "eval_rougeL_for_task1540_peer_read_title_generation": 33.0057, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.3615, + "eval_rougeL_for_task1562_zest_question_rewriting": 52.405, + "eval_rougeL_for_task1586_scifact_title_generation": 33.2956, + "eval_rougeL_for_task1598_nyc_data_to_text": 12.6937, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 83.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 71.5265, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 1.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 39.8565, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 56.0, + "eval_rougeL_for_task1659_billsum_title_generation": 50.3884, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 30.5, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 49.1618, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 45.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 68.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 29.0, + "eval_rougeL_for_task219_rocstories_title_generation": 21.8631, + "eval_rougeL_for_task220_rocstories_title_generation": 86.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 26.25, + "eval_rougeL_for_task233_iirc_answerability_classification": 25.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 56.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 54.1167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 21.5556, + "eval_rougeL_for_task288_gigaword_title_generation": 29.4999, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 9.5, + "eval_rougeL_for_task329_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 66.8238, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 51.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 35.7452, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 14.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 63.436, + "eval_rougeL_for_task418_persent_title_generation": 27.6039, + "eval_rougeL_for_task442_com_qa_question_rewriting": 54.4078, + "eval_rougeL_for_task500_scruples_title_generation": 18.4435, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 34.0084, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 53.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 31.8343, + "eval_rougeL_for_task602_wikitext_title_generation": 16.6821, + "eval_rougeL_for_task613_liar_keyword_tagging": 20.3999, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 26.1899, + "eval_rougeL_for_task619_ohsumed_title_generation": 38.0573, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 42.7643, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 46.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 73.5667, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 66.8222, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 68.3835, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 56.8946, + "eval_rougeL_for_task677_ollie_data_to_text": 30.8649, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 84.0, + "eval_rougeL_for_task743_eurlex_title_generation": 27.5313, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 7.2824, + "eval_rougeL_for_task769_qed_title_generation": 79.0966, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.1667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 58.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 59.6667, + "eval_rougeL_for_task892_gap_coreference_resolution": 27.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 29.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 43.4868, + "eval_rougeL_for_task970_sherliic_textual_entailment": 51.0, + "eval_rougeL_for_textual_entailment": 43.6806, + "eval_rougeL_for_title_generation": 34.9722, + "eval_rougeL_for_word_analogy": 19.7917, + "eval_runtime": 893.9103, + "eval_samples_per_second": 13.323, + "eval_steps_per_second": 0.833, + "step": 1000 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 1.135, + "step": 1500 + }, + { + "epoch": 0.33, + "eval_exact_match": 27.0445, + "eval_exact_match_for_answerability_classification": 47.3077, + "eval_exact_match_for_cause_effect_classification": 42.8571, + "eval_exact_match_for_coreference_resolution": 34.8571, + "eval_exact_match_for_data_to_text": 0.7264, + "eval_exact_match_for_dialogue_act_recognition": 42.4286, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 34.8, + "eval_exact_match_for_overlap_extraction": 6.5, + "eval_exact_match_for_question_rewriting": 1.4545, + "eval_exact_match_for_task020_mctaco_answerability_classification": 54.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 40.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 40.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 13.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 68.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 8.0, + "eval_exact_match_for_task1153_bard_word_analogy": 3.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 35.0, + "eval_exact_match_for_task1156_bard_word_analogy": 29.0, + "eval_exact_match_for_task1157_bard_word_analogy": 27.0, + "eval_exact_match_for_task1158_bard_word_analogy": 24.0, + "eval_exact_match_for_task1159_bard_word_analogy": 7.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 57.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 24.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 59.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 64.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 65.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 19.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 62.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 67.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 60.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 55.0, + "eval_exact_match_for_task1659_billsum_title_generation": 37.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, + "eval_exact_match_for_task190_snli_textual_entailment": 42.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 80.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 24.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 26.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 88.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 43.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 42.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 49.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 48.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 48.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 60.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 59.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 66.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 63.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 3.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 10.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 52.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 3.0, + "eval_exact_match_for_task602_wikitext_title_generation": 5.9524, + "eval_exact_match_for_task613_liar_keyword_tagging": 3.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 1.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 80.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 40.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 50.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 28.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 32.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 1.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 31.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 13.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 26.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 23.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task957_e2e_data_to_text": 1.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 55.0, + "eval_exact_match_for_textual_entailment": 41.0833, + "eval_exact_match_for_title_generation": 9.5852, + "eval_exact_match_for_word_analogy": 17.5, + "eval_f1": 41.763, + "eval_f1_for_answerability_classification": 50.0769, + "eval_f1_for_cause_effect_classification": 56.6309, + "eval_f1_for_coreference_resolution": 39.831, + "eval_f1_for_data_to_text": 35.911, + "eval_f1_for_dialogue_act_recognition": 45.2143, + "eval_f1_for_grammar_error_correction": 65.409, + "eval_f1_for_keyword_tagging": 50.9872, + "eval_f1_for_overlap_extraction": 15.0989, + "eval_f1_for_question_rewriting": 53.839, + "eval_f1_for_task020_mctaco_answerability_classification": 54.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 42.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 20.1789, + "eval_f1_for_task035_winogrande_question_rewriting": 21.9741, + "eval_f1_for_task036_qasc_keyword_tagging": 68.3167, + "eval_f1_for_task039_qasc_overlap_extraction": 13.0, + "eval_f1_for_task050_multirc_answerability_classification": 68.0, + "eval_f1_for_task102_commongen_data_to_text": 49.6833, + "eval_f1_for_task1152_bard_word_analogy": 8.0, + "eval_f1_for_task1153_bard_word_analogy": 3.0, + "eval_f1_for_task1154_bard_word_analogy": 9.0, + "eval_f1_for_task1155_bard_word_analogy": 39.8333, + "eval_f1_for_task1156_bard_word_analogy": 32.3333, + "eval_f1_for_task1157_bard_word_analogy": 27.6667, + "eval_f1_for_task1158_bard_word_analogy": 24.0, + "eval_f1_for_task1159_bard_word_analogy": 10.3333, + "eval_f1_for_task1161_coda_19_title_generation": 35.8195, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 76.4335, + "eval_f1_for_task121_atomic_question_rewriting": 46.7595, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.8588, + "eval_f1_for_task1344_rte_textual_entailment": 57.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.8963, + "eval_f1_for_task1356_xlsum_title_generation": 20.1508, + "eval_f1_for_task1358_xlsum_title_generation": 32.3978, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 24.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 59.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 64.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 65.0, + "eval_f1_for_task1407_dart_data_to_text": 25.0301, + "eval_f1_for_task1409_dart_data_to_text": 49.658, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 45.989, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 57.6667, + "eval_f1_for_task1516_imppres_textual_entailment": 19.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 62.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 52.0, + "eval_f1_for_task1540_peer_read_title_generation": 29.7067, + "eval_f1_for_task1554_scitail_textual_entailment": 67.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.829, + "eval_f1_for_task1562_zest_question_rewriting": 57.3285, + "eval_f1_for_task1586_scifact_title_generation": 34.5895, + "eval_f1_for_task1598_nyc_data_to_text": 3.5413, + "eval_f1_for_task1612_sick_textual_entailment": 34.0, + "eval_f1_for_task1615_sick_textual_entailment": 60.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 73.3572, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_f1_for_task1631_open_pi_data_to_text": 36.0664, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 55.0, + "eval_f1_for_task1659_billsum_title_generation": 50.5107, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 8.11, + "eval_f1_for_task1728_web_nlg_data_to_text": 49.8442, + "eval_f1_for_task190_snli_textual_entailment": 42.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 80.0, + "eval_f1_for_task201_multinli_textual_entailment": 24.0, + "eval_f1_for_task202_multinli_textual_entailment": 26.0, + "eval_f1_for_task219_rocstories_title_generation": 19.1082, + "eval_f1_for_task220_rocstories_title_generation": 89.3333, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 43.0, + "eval_f1_for_task233_iirc_answerability_classification": 42.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 49.6667, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 60.7048, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 17.1978, + "eval_f1_for_task288_gigaword_title_generation": 27.6779, + "eval_f1_for_task290_tellmewhy_answerability_classification": 82.6667, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 5.8333, + "eval_f1_for_task329_gap_coreference_resolution": 48.0, + "eval_f1_for_task330_gap_coreference_resolution": 66.9302, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 78.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 88.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 87.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 37.8623, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 11.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 75.4699, + "eval_f1_for_task418_persent_title_generation": 25.7247, + "eval_f1_for_task442_com_qa_question_rewriting": 64.1634, + "eval_f1_for_task500_scruples_title_generation": 17.0168, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 35.8266, + "eval_f1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 31.7341, + "eval_f1_for_task602_wikitext_title_generation": 13.9779, + "eval_f1_for_task613_liar_keyword_tagging": 11.8708, + "eval_f1_for_task614_glucose_cause_effect_classification": 14.2205, + "eval_f1_for_task619_ohsumed_title_generation": 41.9934, + "eval_f1_for_task620_ohsumed_keyword_tagging": 23.0869, + "eval_f1_for_task623_ohsumed_keyword_tagging": 80.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 40.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 71.6619, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 56.8222, + "eval_f1_for_task670_ambigqa_question_rewriting": 70.1564, + "eval_f1_for_task671_ambigqa_question_rewriting": 46.5111, + "eval_f1_for_task677_ollie_data_to_text": 33.2234, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 30.6475, + "eval_f1_for_task760_msr_sqa_data_to_text": 7.4519, + "eval_f1_for_task769_qed_title_generation": 57.6086, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 54.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 1.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 31.0, + "eval_f1_for_task890_gwsd_textual_entailment": 13.0, + "eval_f1_for_task891_gap_coreference_resolution": 48.4, + "eval_f1_for_task892_gap_coreference_resolution": 26.0, + "eval_f1_for_task893_gap_coreference_resolution": 23.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task957_e2e_data_to_text": 47.6407, + "eval_f1_for_task970_sherliic_textual_entailment": 55.0, + "eval_f1_for_textual_entailment": 41.0833, + "eval_f1_for_title_generation": 34.0497, + "eval_f1_for_word_analogy": 19.2708, + "eval_gen_len": 10.1588, + "eval_global_step": 1500, + "eval_loss": 1.463843584060669, + "eval_rouge1": 43.8561, + "eval_rouge1_for_answerability_classification": 50.0769, + "eval_rouge1_for_cause_effect_classification": 58.0482, + "eval_rouge1_for_coreference_resolution": 39.9947, + "eval_rouge1_for_data_to_text": 38.9209, + "eval_rouge1_for_dialogue_act_recognition": 47.5912, + "eval_rouge1_for_grammar_error_correction": 68.009, + "eval_rouge1_for_keyword_tagging": 54.2308, + "eval_rouge1_for_overlap_extraction": 18.342, + "eval_rouge1_for_question_rewriting": 55.3986, + "eval_rouge1_for_task020_mctaco_answerability_classification": 54.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 42.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 20.4451, + "eval_rouge1_for_task035_winogrande_question_rewriting": 22.5037, + "eval_rouge1_for_task036_qasc_keyword_tagging": 72.9803, + "eval_rouge1_for_task039_qasc_overlap_extraction": 19.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 68.0, + "eval_rouge1_for_task102_commongen_data_to_text": 59.2757, + "eval_rouge1_for_task1152_bard_word_analogy": 8.0, + "eval_rouge1_for_task1153_bard_word_analogy": 3.0, + "eval_rouge1_for_task1154_bard_word_analogy": 9.0, + "eval_rouge1_for_task1155_bard_word_analogy": 39.8333, + "eval_rouge1_for_task1156_bard_word_analogy": 33.0, + "eval_rouge1_for_task1157_bard_word_analogy": 27.6667, + "eval_rouge1_for_task1158_bard_word_analogy": 24.0, + "eval_rouge1_for_task1159_bard_word_analogy": 10.3333, + "eval_rouge1_for_task1161_coda_19_title_generation": 39.403, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 77.0446, + "eval_rouge1_for_task121_atomic_question_rewriting": 48.0068, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 16.3956, + "eval_rouge1_for_task1344_rte_textual_entailment": 57.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.7327, + "eval_rouge1_for_task1356_xlsum_title_generation": 23.0359, + "eval_rouge1_for_task1358_xlsum_title_generation": 36.6043, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 24.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 59.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 64.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 69.9714, + "eval_rouge1_for_task1407_dart_data_to_text": 27.24, + "eval_rouge1_for_task1409_dart_data_to_text": 50.3542, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 48.354, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 57.6667, + "eval_rouge1_for_task1516_imppres_textual_entailment": 19.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 62.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 52.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 32.9666, + "eval_rouge1_for_task1554_scitail_textual_entailment": 67.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.664, + "eval_rouge1_for_task1562_zest_question_rewriting": 60.1892, + "eval_rouge1_for_task1586_scifact_title_generation": 39.2815, + "eval_rouge1_for_task1598_nyc_data_to_text": 3.6478, + "eval_rouge1_for_task1612_sick_textual_entailment": 34.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 86.5, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 73.8011, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 36.7415, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 55.0, + "eval_rouge1_for_task1659_billsum_title_generation": 52.4377, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 8.1011, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 55.2544, + "eval_rouge1_for_task190_snli_textual_entailment": 42.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 80.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 24.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 26.0, + "eval_rouge1_for_task219_rocstories_title_generation": 21.7989, + "eval_rouge1_for_task220_rocstories_title_generation": 89.3333, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 43.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 42.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 49.6667, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 61.55, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 17.684, + "eval_rouge1_for_task288_gigaword_title_generation": 30.6951, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 82.6667, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 6.5, + "eval_rouge1_for_task329_gap_coreference_resolution": 48.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 66.8857, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 78.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 88.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 38.2791, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 11.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 77.6033, + "eval_rouge1_for_task418_persent_title_generation": 28.8732, + "eval_rouge1_for_task442_com_qa_question_rewriting": 67.6131, + "eval_rouge1_for_task500_scruples_title_generation": 18.8228, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 36.3343, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 33.8074, + "eval_rouge1_for_task602_wikitext_title_generation": 14.3963, + "eval_rouge1_for_task613_liar_keyword_tagging": 18.3162, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 23.7246, + "eval_rouge1_for_task619_ohsumed_title_generation": 45.3409, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 27.1434, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 80.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 40.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 72.7143, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 57.3222, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 71.1598, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 47.2851, + "eval_rouge1_for_task677_ollie_data_to_text": 36.6769, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 83.0, + "eval_rouge1_for_task743_eurlex_title_generation": 33.0857, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.6801, + "eval_rouge1_for_task769_qed_title_generation": 57.4888, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 1.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 13.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 48.7333, + "eval_rouge1_for_task892_gap_coreference_resolution": 26.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 23.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task957_e2e_data_to_text": 50.2995, + "eval_rouge1_for_task970_sherliic_textual_entailment": 55.0, + "eval_rouge1_for_textual_entailment": 45.6458, + "eval_rouge1_for_title_generation": 36.3115, + "eval_rouge1_for_word_analogy": 19.3542, + "eval_rougeL": 42.4819, + "eval_rougeL_for_answerability_classification": 50.0769, + "eval_rougeL_for_cause_effect_classification": 57.6264, + "eval_rougeL_for_coreference_resolution": 39.9568, + "eval_rougeL_for_data_to_text": 33.0632, + "eval_rougeL_for_dialogue_act_recognition": 47.5912, + "eval_rougeL_for_grammar_error_correction": 66.5441, + "eval_rougeL_for_keyword_tagging": 54.0151, + "eval_rougeL_for_overlap_extraction": 18.1056, + "eval_rougeL_for_question_rewriting": 51.1252, + "eval_rougeL_for_task020_mctaco_answerability_classification": 54.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 42.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 20.0999, + "eval_rougeL_for_task035_winogrande_question_rewriting": 21.3381, + "eval_rougeL_for_task036_qasc_keyword_tagging": 72.7985, + "eval_rougeL_for_task039_qasc_overlap_extraction": 19.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 68.0, + "eval_rougeL_for_task102_commongen_data_to_text": 53.0639, + "eval_rougeL_for_task1152_bard_word_analogy": 8.0, + "eval_rougeL_for_task1153_bard_word_analogy": 3.0, + "eval_rougeL_for_task1154_bard_word_analogy": 9.0, + "eval_rougeL_for_task1155_bard_word_analogy": 39.8333, + "eval_rougeL_for_task1156_bard_word_analogy": 33.0, + "eval_rougeL_for_task1157_bard_word_analogy": 27.6667, + "eval_rougeL_for_task1158_bard_word_analogy": 24.0, + "eval_rougeL_for_task1159_bard_word_analogy": 10.3333, + "eval_rougeL_for_task1161_coda_19_title_generation": 32.5368, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 74.6982, + "eval_rougeL_for_task121_atomic_question_rewriting": 44.0962, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.6876, + "eval_rougeL_for_task1344_rte_textual_entailment": 57.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.4301, + "eval_rougeL_for_task1356_xlsum_title_generation": 19.7265, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.2176, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 24.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 59.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 64.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 69.9714, + "eval_rougeL_for_task1407_dart_data_to_text": 21.8292, + "eval_rougeL_for_task1409_dart_data_to_text": 42.7068, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 46.3404, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 57.6667, + "eval_rougeL_for_task1516_imppres_textual_entailment": 19.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 62.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 52.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 28.3566, + "eval_rougeL_for_task1554_scitail_textual_entailment": 67.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.7478, + "eval_rougeL_for_task1562_zest_question_rewriting": 52.1571, + "eval_rougeL_for_task1586_scifact_title_generation": 31.263, + "eval_rougeL_for_task1598_nyc_data_to_text": 3.5409, + "eval_rougeL_for_task1612_sick_textual_entailment": 34.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 86.5, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 70.9122, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 34.2644, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 55.0, + "eval_rougeL_for_task1659_billsum_title_generation": 49.113, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 7.571, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 47.7462, + "eval_rougeL_for_task190_snli_textual_entailment": 42.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 80.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 24.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 26.0, + "eval_rougeL_for_task219_rocstories_title_generation": 21.5132, + "eval_rougeL_for_task220_rocstories_title_generation": 89.3333, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 43.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 42.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 49.6667, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 61.55, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 17.2112, + "eval_rougeL_for_task288_gigaword_title_generation": 26.8016, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 82.6667, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 6.5, + "eval_rougeL_for_task329_gap_coreference_resolution": 48.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 66.8857, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 78.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 88.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 36.8672, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 11.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 61.0989, + "eval_rougeL_for_task418_persent_title_generation": 25.5417, + "eval_rougeL_for_task442_com_qa_question_rewriting": 62.4237, + "eval_rougeL_for_task500_scruples_title_generation": 16.3674, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 35.83, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 30.1072, + "eval_rougeL_for_task602_wikitext_title_generation": 14.3712, + "eval_rougeL_for_task613_liar_keyword_tagging": 18.3162, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 22.1845, + "eval_rougeL_for_task619_ohsumed_title_generation": 37.8239, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 26.2463, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 80.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 40.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 72.7143, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 57.3222, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 69.6292, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 45.4932, + "eval_rougeL_for_task677_ollie_data_to_text": 29.8137, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 83.0, + "eval_rougeL_for_task743_eurlex_title_generation": 26.704, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.7056, + "eval_rougeL_for_task769_qed_title_generation": 57.4888, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 1.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 13.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 48.7333, + "eval_rougeL_for_task892_gap_coreference_resolution": 26.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 23.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task957_e2e_data_to_text": 38.3935, + "eval_rougeL_for_task970_sherliic_textual_entailment": 55.0, + "eval_rougeL_for_textual_entailment": 45.6458, + "eval_rougeL_for_title_generation": 32.9307, + "eval_rougeL_for_word_analogy": 19.3542, + "eval_runtime": 1117.7747, + "eval_samples_per_second": 10.655, + "eval_steps_per_second": 0.667, + "step": 1500 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 1.0948, + "step": 2000 + }, + { + "epoch": 0.44, + "eval_exact_match": 28.0856, + "eval_exact_match_for_answerability_classification": 44.4615, + "eval_exact_match_for_cause_effect_classification": 45.8571, + "eval_exact_match_for_coreference_resolution": 40.4286, + "eval_exact_match_for_data_to_text": 0.7264, + "eval_exact_match_for_dialogue_act_recognition": 33.1429, + "eval_exact_match_for_grammar_error_correction": 9.0, + "eval_exact_match_for_keyword_tagging": 44.0, + "eval_exact_match_for_overlap_extraction": 3.0, + "eval_exact_match_for_question_rewriting": 2.2727, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 57.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 46.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 6.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 69.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 5.0, + "eval_exact_match_for_task1153_bard_word_analogy": 1.0, + "eval_exact_match_for_task1154_bard_word_analogy": 13.0, + "eval_exact_match_for_task1155_bard_word_analogy": 60.0, + "eval_exact_match_for_task1156_bard_word_analogy": 29.0, + "eval_exact_match_for_task1157_bard_word_analogy": 40.0, + "eval_exact_match_for_task1158_bard_word_analogy": 21.0, + "eval_exact_match_for_task1159_bard_word_analogy": 11.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 9.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 46.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 31.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 40.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 24.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 64.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 73.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 65.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 58.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 27.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 64.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 70.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 18.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 39.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 48.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 12.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1659_billsum_title_generation": 36.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 38.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 79.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 19.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 13.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 7.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 25.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 65.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 62.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 49.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 57.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 1.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 53.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 65.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 1.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 11.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 20.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 80.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 36.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 45.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 58.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 35.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 69.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 75.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 57.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 1.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 72.0, + "eval_exact_match_for_textual_entailment": 40.625, + "eval_exact_match_for_title_generation": 12.2197, + "eval_exact_match_for_word_analogy": 22.5, + "eval_f1": 43.5637, + "eval_f1_for_answerability_classification": 47.0769, + "eval_f1_for_cause_effect_classification": 61.9583, + "eval_f1_for_coreference_resolution": 46.3415, + "eval_f1_for_data_to_text": 36.0778, + "eval_f1_for_dialogue_act_recognition": 33.1429, + "eval_f1_for_grammar_error_correction": 58.9413, + "eval_f1_for_keyword_tagging": 60.2381, + "eval_f1_for_overlap_extraction": 11.1656, + "eval_f1_for_question_rewriting": 65.0036, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 58.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 67.0383, + "eval_f1_for_task035_winogrande_question_rewriting": 77.5228, + "eval_f1_for_task036_qasc_keyword_tagging": 73.4524, + "eval_f1_for_task039_qasc_overlap_extraction": 6.6667, + "eval_f1_for_task050_multirc_answerability_classification": 69.0, + "eval_f1_for_task102_commongen_data_to_text": 50.7815, + "eval_f1_for_task1152_bard_word_analogy": 6.1667, + "eval_f1_for_task1153_bard_word_analogy": 5.1667, + "eval_f1_for_task1154_bard_word_analogy": 13.0, + "eval_f1_for_task1155_bard_word_analogy": 60.0, + "eval_f1_for_task1156_bard_word_analogy": 32.1667, + "eval_f1_for_task1157_bard_word_analogy": 40.0, + "eval_f1_for_task1158_bard_word_analogy": 21.6667, + "eval_f1_for_task1159_bard_word_analogy": 12.3333, + "eval_f1_for_task1161_coda_19_title_generation": 34.1391, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 77.0508, + "eval_f1_for_task121_atomic_question_rewriting": 52.488, + "eval_f1_for_task133_winowhy_coreference_resolution": 46.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.0956, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.0196, + "eval_f1_for_task1356_xlsum_title_generation": 20.5989, + "eval_f1_for_task1358_xlsum_title_generation": 32.7497, + "eval_f1_for_task1385_anli_textual_entailment": 31.0, + "eval_f1_for_task1386_anli_textual_entailment": 35.0, + "eval_f1_for_task1387_anli_textual_entailment": 40.0, + "eval_f1_for_task1388_cb_textual_entailment": 24.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 64.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 73.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 65.0, + "eval_f1_for_task1407_dart_data_to_text": 27.2187, + "eval_f1_for_task1409_dart_data_to_text": 46.5285, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 33.3419, + "eval_f1_for_task1439_doqa_answerability_classification": 58.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 27.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 64.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 53.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1540_peer_read_title_generation": 31.1466, + "eval_f1_for_task1554_scitail_textual_entailment": 70.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.5406, + "eval_f1_for_task1562_zest_question_rewriting": 56.9871, + "eval_f1_for_task1586_scifact_title_generation": 34.2748, + "eval_f1_for_task1598_nyc_data_to_text": 5.3892, + "eval_f1_for_task1612_sick_textual_entailment": 39.0, + "eval_f1_for_task1615_sick_textual_entailment": 48.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.426, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_f1_for_task1631_open_pi_data_to_text": 36.6393, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_f1_for_task1659_billsum_title_generation": 50.0305, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 28.7039, + "eval_f1_for_task1728_web_nlg_data_to_text": 50.3486, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 38.0, + "eval_f1_for_task200_multinli_textual_entailment": 79.0, + "eval_f1_for_task201_multinli_textual_entailment": 19.0, + "eval_f1_for_task202_multinli_textual_entailment": 13.0, + "eval_f1_for_task219_rocstories_title_generation": 15.8514, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_f1_for_task232_iirc_answerability_classification": 7.0, + "eval_f1_for_task233_iirc_answerability_classification": 25.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 65.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 71.9, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 15.6646, + "eval_f1_for_task288_gigaword_title_generation": 27.163, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 4.3333, + "eval_f1_for_task329_gap_coreference_resolution": 55.0, + "eval_f1_for_task330_gap_coreference_resolution": 63.7016, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 1.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 84.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 88.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 38.1486, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 12.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 76.9609, + "eval_f1_for_task418_persent_title_generation": 24.6974, + "eval_f1_for_task442_com_qa_question_rewriting": 63.6273, + "eval_f1_for_task500_scruples_title_generation": 18.6742, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.8836, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 37.395, + "eval_f1_for_task602_wikitext_title_generation": 11.6962, + "eval_f1_for_task613_liar_keyword_tagging": 20.9167, + "eval_f1_for_task614_glucose_cause_effect_classification": 20.8926, + "eval_f1_for_task619_ohsumed_title_generation": 42.5094, + "eval_f1_for_task620_ohsumed_keyword_tagging": 45.0167, + "eval_f1_for_task623_ohsumed_keyword_tagging": 80.0, + "eval_f1_for_task640_e_snli_textual_entailment": 36.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 45.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 81.8048, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 60.1569, + "eval_f1_for_task670_ambigqa_question_rewriting": 69.0823, + "eval_f1_for_task671_ambigqa_question_rewriting": 55.837, + "eval_f1_for_task677_ollie_data_to_text": 35.7656, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 30.5989, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.0872, + "eval_f1_for_task769_qed_title_generation": 83.3667, + "eval_f1_for_task827_copa_cause_effect_classification": 75.0, + "eval_f1_for_task828_copa_cause_effect_classification": 54.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_f1_for_task890_gwsd_textual_entailment": 57.0, + "eval_f1_for_task891_gap_coreference_resolution": 51.319, + "eval_f1_for_task892_gap_coreference_resolution": 43.0, + "eval_f1_for_task893_gap_coreference_resolution": 40.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 43.7484, + "eval_f1_for_task970_sherliic_textual_entailment": 72.0, + "eval_f1_for_textual_entailment": 40.625, + "eval_f1_for_title_generation": 35.9865, + "eval_f1_for_word_analogy": 23.8125, + "eval_gen_len": 7.9311, + "eval_global_step": 2000, + "eval_loss": 1.4544899463653564, + "eval_rouge1": 45.6123, + "eval_rouge1_for_answerability_classification": 47.0769, + "eval_rouge1_for_cause_effect_classification": 62.4959, + "eval_rouge1_for_coreference_resolution": 46.7179, + "eval_rouge1_for_data_to_text": 38.1398, + "eval_rouge1_for_dialogue_act_recognition": 35.5503, + "eval_rouge1_for_grammar_error_correction": 61.2914, + "eval_rouge1_for_keyword_tagging": 63.9276, + "eval_rouge1_for_overlap_extraction": 13.09, + "eval_rouge1_for_question_rewriting": 66.685, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 58.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 67.2264, + "eval_rouge1_for_task035_winogrande_question_rewriting": 78.6819, + "eval_rouge1_for_task036_qasc_keyword_tagging": 79.1667, + "eval_rouge1_for_task039_qasc_overlap_extraction": 9.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 69.0, + "eval_rouge1_for_task102_commongen_data_to_text": 59.0306, + "eval_rouge1_for_task1152_bard_word_analogy": 6.1667, + "eval_rouge1_for_task1153_bard_word_analogy": 5.1667, + "eval_rouge1_for_task1154_bard_word_analogy": 13.0, + "eval_rouge1_for_task1155_bard_word_analogy": 60.0, + "eval_rouge1_for_task1156_bard_word_analogy": 32.8333, + "eval_rouge1_for_task1157_bard_word_analogy": 40.0, + "eval_rouge1_for_task1158_bard_word_analogy": 21.6667, + "eval_rouge1_for_task1159_bard_word_analogy": 12.3333, + "eval_rouge1_for_task1161_coda_19_title_generation": 37.992, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 78.0555, + "eval_rouge1_for_task121_atomic_question_rewriting": 54.2465, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 46.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.5889, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.488, + "eval_rouge1_for_task1356_xlsum_title_generation": 24.1531, + "eval_rouge1_for_task1358_xlsum_title_generation": 36.2659, + "eval_rouge1_for_task1385_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 40.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 24.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 64.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 73.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 70.1857, + "eval_rouge1_for_task1407_dart_data_to_text": 28.1319, + "eval_rouge1_for_task1409_dart_data_to_text": 47.3104, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.0261, + "eval_rouge1_for_task1439_doqa_answerability_classification": 58.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 27.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 64.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 53.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 34.701, + "eval_rouge1_for_task1554_scitail_textual_entailment": 70.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.5567, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.9373, + "eval_rouge1_for_task1586_scifact_title_generation": 38.4631, + "eval_rouge1_for_task1598_nyc_data_to_text": 5.3278, + "eval_rouge1_for_task1612_sick_textual_entailment": 39.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 82.8, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.2887, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 37.4578, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1659_billsum_title_generation": 51.569, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 28.7039, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 52.6456, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 38.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 79.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 19.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 13.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.9943, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 7.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 25.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 65.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 72.5667, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 16.5133, + "eval_rouge1_for_task288_gigaword_title_generation": 30.4248, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 4.3333, + "eval_rouge1_for_task329_gap_coreference_resolution": 55.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 63.4905, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 1.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 84.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 88.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 39.0234, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 15.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 78.9286, + "eval_rouge1_for_task418_persent_title_generation": 28.2464, + "eval_rouge1_for_task442_com_qa_question_rewriting": 66.8415, + "eval_rouge1_for_task500_scruples_title_generation": 20.5016, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.2507, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 39.0703, + "eval_rouge1_for_task602_wikitext_title_generation": 12.7239, + "eval_rouge1_for_task613_liar_keyword_tagging": 28.3167, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 23.7809, + "eval_rouge1_for_task619_ohsumed_title_generation": 46.1415, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 49.5833, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 80.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 36.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 45.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 82.5714, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 61.7378, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 70.2232, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 56.6176, + "eval_rouge1_for_task677_ollie_data_to_text": 38.9473, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 78.0, + "eval_rouge1_for_task743_eurlex_title_generation": 32.7951, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.4112, + "eval_rouge1_for_task769_qed_title_generation": 83.2661, + "eval_rouge1_for_task827_copa_cause_effect_classification": 75.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 57.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 51.5524, + "eval_rouge1_for_task892_gap_coreference_resolution": 43.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 40.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 44.5162, + "eval_rouge1_for_task970_sherliic_textual_entailment": 72.0, + "eval_rouge1_for_textual_entailment": 45.325, + "eval_rouge1_for_title_generation": 38.3471, + "eval_rouge1_for_word_analogy": 23.8958, + "eval_rougeL": 44.2189, + "eval_rougeL_for_answerability_classification": 47.0769, + "eval_rougeL_for_cause_effect_classification": 62.1896, + "eval_rougeL_for_coreference_resolution": 46.6941, + "eval_rougeL_for_data_to_text": 32.9255, + "eval_rougeL_for_dialogue_act_recognition": 35.5503, + "eval_rougeL_for_grammar_error_correction": 59.6391, + "eval_rougeL_for_keyword_tagging": 63.5276, + "eval_rougeL_for_overlap_extraction": 12.879, + "eval_rougeL_for_question_rewriting": 61.3873, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 58.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 65.3296, + "eval_rougeL_for_task035_winogrande_question_rewriting": 69.1397, + "eval_rougeL_for_task036_qasc_keyword_tagging": 79.1667, + "eval_rougeL_for_task039_qasc_overlap_extraction": 9.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 69.0, + "eval_rougeL_for_task102_commongen_data_to_text": 52.6762, + "eval_rougeL_for_task1152_bard_word_analogy": 6.1667, + "eval_rougeL_for_task1153_bard_word_analogy": 5.1667, + "eval_rougeL_for_task1154_bard_word_analogy": 13.0, + "eval_rougeL_for_task1155_bard_word_analogy": 60.0, + "eval_rougeL_for_task1156_bard_word_analogy": 32.8333, + "eval_rougeL_for_task1157_bard_word_analogy": 40.0, + "eval_rougeL_for_task1158_bard_word_analogy": 21.6667, + "eval_rougeL_for_task1159_bard_word_analogy": 12.3333, + "eval_rougeL_for_task1161_coda_19_title_generation": 31.368, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 75.2065, + "eval_rougeL_for_task121_atomic_question_rewriting": 49.8899, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 46.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.3015, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.6174, + "eval_rougeL_for_task1356_xlsum_title_generation": 21.145, + "eval_rougeL_for_task1358_xlsum_title_generation": 30.5105, + "eval_rougeL_for_task1385_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 40.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 24.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 64.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 73.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 70.1857, + "eval_rougeL_for_task1407_dart_data_to_text": 22.5194, + "eval_rougeL_for_task1409_dart_data_to_text": 40.3647, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 32.5865, + "eval_rougeL_for_task1439_doqa_answerability_classification": 58.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 27.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 64.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 53.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 29.7346, + "eval_rougeL_for_task1554_scitail_textual_entailment": 70.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.6918, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.7073, + "eval_rougeL_for_task1586_scifact_title_generation": 32.0368, + "eval_rougeL_for_task1598_nyc_data_to_text": 5.3278, + "eval_rougeL_for_task1612_sick_textual_entailment": 39.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 82.8, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.0446, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 35.1409, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1659_billsum_title_generation": 48.3004, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 28.7039, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 46.552, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 38.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 79.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 19.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 13.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.5419, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 7.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 25.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 65.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 72.5667, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 16.0914, + "eval_rougeL_for_task288_gigaword_title_generation": 26.4914, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 4.3333, + "eval_rougeL_for_task329_gap_coreference_resolution": 55.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 63.4905, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 1.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 84.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 88.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 38.1599, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 15.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 62.3457, + "eval_rougeL_for_task418_persent_title_generation": 25.0231, + "eval_rougeL_for_task442_com_qa_question_rewriting": 60.6457, + "eval_rougeL_for_task500_scruples_title_generation": 18.5356, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.9754, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 37.4046, + "eval_rougeL_for_task602_wikitext_title_generation": 12.7239, + "eval_rougeL_for_task613_liar_keyword_tagging": 28.3167, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 22.5008, + "eval_rougeL_for_task619_ohsumed_title_generation": 38.06, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 47.5833, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 80.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 36.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 45.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 82.5714, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 61.4045, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 68.6738, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 54.6595, + "eval_rougeL_for_task677_ollie_data_to_text": 31.9876, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 78.0, + "eval_rougeL_for_task743_eurlex_title_generation": 26.7303, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.4572, + "eval_rougeL_for_task769_qed_title_generation": 82.8661, + "eval_rougeL_for_task827_copa_cause_effect_classification": 75.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 57.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 51.5524, + "eval_rougeL_for_task892_gap_coreference_resolution": 43.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 40.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 54.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 35.9775, + "eval_rougeL_for_task970_sherliic_textual_entailment": 72.0, + "eval_rougeL_for_textual_entailment": 45.325, + "eval_rougeL_for_title_generation": 35.1857, + "eval_rougeL_for_word_analogy": 23.8958, + "eval_runtime": 823.1973, + "eval_samples_per_second": 14.468, + "eval_steps_per_second": 0.905, + "step": 2000 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 1.0538, + "step": 2500 + }, + { + "epoch": 0.55, + "eval_exact_match": 29.6641, + "eval_exact_match_for_answerability_classification": 56.0, + "eval_exact_match_for_cause_effect_classification": 45.2857, + "eval_exact_match_for_coreference_resolution": 39.7857, + "eval_exact_match_for_data_to_text": 0.4843, + "eval_exact_match_for_dialogue_act_recognition": 42.7143, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 39.2, + "eval_exact_match_for_overlap_extraction": 3.5, + "eval_exact_match_for_question_rewriting": 1.6364, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 56.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 41.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 7.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 71.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 8.0, + "eval_exact_match_for_task1154_bard_word_analogy": 8.0, + "eval_exact_match_for_task1155_bard_word_analogy": 64.0, + "eval_exact_match_for_task1156_bard_word_analogy": 41.0, + "eval_exact_match_for_task1157_bard_word_analogy": 42.0, + "eval_exact_match_for_task1158_bard_word_analogy": 24.0, + "eval_exact_match_for_task1159_bard_word_analogy": 13.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 1.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 49.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 17.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 24.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 23.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 4.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 61.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 68.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 70.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 61.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 58.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 35.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 72.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 73.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 47.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 63.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 10.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 74.0, + "eval_exact_match_for_task1659_billsum_title_generation": 37.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 24.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 37.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 85.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 18.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 48.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 97.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 63.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 60.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 49.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 59.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 70.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 62.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 49.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 1.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 29.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 4.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 88.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 5.0, + "eval_exact_match_for_task602_wikitext_title_generation": 8.3333, + "eval_exact_match_for_task613_liar_keyword_tagging": 13.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 20.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 79.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 21.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 57.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 36.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 43.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 27.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 66.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 75.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 62.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 1.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 18.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 1.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 30.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_exact_match_for_task957_e2e_data_to_text": 1.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 71.0, + "eval_exact_match_for_textual_entailment": 40.4167, + "eval_exact_match_for_title_generation": 12.2758, + "eval_exact_match_for_word_analogy": 25.375, + "eval_f1": 45.7022, + "eval_f1_for_answerability_classification": 58.6154, + "eval_f1_for_cause_effect_classification": 59.8424, + "eval_f1_for_coreference_resolution": 45.6955, + "eval_f1_for_data_to_text": 37.4697, + "eval_f1_for_dialogue_act_recognition": 45.3333, + "eval_f1_for_grammar_error_correction": 66.9318, + "eval_f1_for_keyword_tagging": 54.7925, + "eval_f1_for_overlap_extraction": 12.1329, + "eval_f1_for_question_rewriting": 65.975, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 58.5, + "eval_f1_for_task034_winogrande_question_rewriting": 65.2437, + "eval_f1_for_task035_winogrande_question_rewriting": 76.1833, + "eval_f1_for_task036_qasc_keyword_tagging": 66.969, + "eval_f1_for_task039_qasc_overlap_extraction": 7.0, + "eval_f1_for_task050_multirc_answerability_classification": 71.0, + "eval_f1_for_task102_commongen_data_to_text": 52.5312, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 8.0, + "eval_f1_for_task1154_bard_word_analogy": 8.6667, + "eval_f1_for_task1155_bard_word_analogy": 64.0, + "eval_f1_for_task1156_bard_word_analogy": 42.3333, + "eval_f1_for_task1157_bard_word_analogy": 42.0, + "eval_f1_for_task1158_bard_word_analogy": 24.0, + "eval_f1_for_task1159_bard_word_analogy": 19.0, + "eval_f1_for_task1161_coda_19_title_generation": 34.2495, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.5074, + "eval_f1_for_task121_atomic_question_rewriting": 52.4005, + "eval_f1_for_task133_winowhy_coreference_resolution": 49.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 12.8902, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.9046, + "eval_f1_for_task1356_xlsum_title_generation": 20.7411, + "eval_f1_for_task1358_xlsum_title_generation": 31.4441, + "eval_f1_for_task1385_anli_textual_entailment": 17.0, + "eval_f1_for_task1386_anli_textual_entailment": 24.0, + "eval_f1_for_task1387_anli_textual_entailment": 23.0, + "eval_f1_for_task1388_cb_textual_entailment": 4.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 61.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 68.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 70.0, + "eval_f1_for_task1407_dart_data_to_text": 25.5117, + "eval_f1_for_task1409_dart_data_to_text": 47.8484, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 49.3611, + "eval_f1_for_task1439_doqa_answerability_classification": 61.0, + "eval_f1_for_task1442_doqa_answerability_classification": 58.0, + "eval_f1_for_task1516_imppres_textual_entailment": 35.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 72.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1540_peer_read_title_generation": 32.7628, + "eval_f1_for_task1554_scitail_textual_entailment": 73.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.5024, + "eval_f1_for_task1562_zest_question_rewriting": 57.2094, + "eval_f1_for_task1586_scifact_title_generation": 35.5944, + "eval_f1_for_task1598_nyc_data_to_text": 8.1219, + "eval_f1_for_task1612_sick_textual_entailment": 47.0, + "eval_f1_for_task1615_sick_textual_entailment": 63.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.6584, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_f1_for_task1631_open_pi_data_to_text": 37.0557, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 74.0, + "eval_f1_for_task1659_billsum_title_generation": 50.3819, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 23.7564, + "eval_f1_for_task1728_web_nlg_data_to_text": 53.5025, + "eval_f1_for_task190_snli_textual_entailment": 24.0, + "eval_f1_for_task199_multinli_textual_entailment": 37.0, + "eval_f1_for_task200_multinli_textual_entailment": 85.0, + "eval_f1_for_task201_multinli_textual_entailment": 18.0, + "eval_f1_for_task202_multinli_textual_entailment": 48.0, + "eval_f1_for_task219_rocstories_title_generation": 20.0054, + "eval_f1_for_task220_rocstories_title_generation": 97.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_f1_for_task232_iirc_answerability_classification": 49.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 63.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 71.7048, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 17.2658, + "eval_f1_for_task288_gigaword_title_generation": 26.9029, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_f1_for_task329_gap_coreference_resolution": 50.0, + "eval_f1_for_task330_gap_coreference_resolution": 63.5238, + "eval_f1_for_task349_squad2.0_answerability_classification": 59.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 84.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 36.7993, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 29.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 76.4883, + "eval_f1_for_task418_persent_title_generation": 26.596, + "eval_f1_for_task442_com_qa_question_rewriting": 69.2548, + "eval_f1_for_task500_scruples_title_generation": 18.4243, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 38.5722, + "eval_f1_for_task520_aquamuse_answerability_classification": 88.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 39.1066, + "eval_f1_for_task602_wikitext_title_generation": 14.316, + "eval_f1_for_task613_liar_keyword_tagging": 17.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 6.7641, + "eval_f1_for_task619_ohsumed_title_generation": 45.6018, + "eval_f1_for_task620_ohsumed_keyword_tagging": 44.219, + "eval_f1_for_task623_ohsumed_keyword_tagging": 79.0, + "eval_f1_for_task640_e_snli_textual_entailment": 21.0, + "eval_f1_for_task641_e_snli_textual_entailment": 57.0, + "eval_f1_for_task642_e_snli_textual_entailment": 36.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 66.1079, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 56.5, + "eval_f1_for_task670_ambigqa_question_rewriting": 72.1127, + "eval_f1_for_task671_ambigqa_question_rewriting": 58.7619, + "eval_f1_for_task677_ollie_data_to_text": 34.7252, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 30.8468, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.9141, + "eval_f1_for_task769_qed_title_generation": 83.7073, + "eval_f1_for_task827_copa_cause_effect_classification": 75.0, + "eval_f1_for_task828_copa_cause_effect_classification": 62.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 4.8333, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 18.0, + "eval_f1_for_task890_gwsd_textual_entailment": 1.0, + "eval_f1_for_task891_gap_coreference_resolution": 49.0857, + "eval_f1_for_task892_gap_coreference_resolution": 30.0, + "eval_f1_for_task893_gap_coreference_resolution": 43.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_f1_for_task957_e2e_data_to_text": 48.4052, + "eval_f1_for_task970_sherliic_textual_entailment": 71.0, + "eval_f1_for_textual_entailment": 40.4167, + "eval_f1_for_title_generation": 36.8191, + "eval_f1_for_word_analogy": 26.375, + "eval_gen_len": 8.0519, + "eval_global_step": 2500, + "eval_loss": 1.4491918087005615, + "eval_rouge1": 48.5459, + "eval_rouge1_for_answerability_classification": 58.6154, + "eval_rouge1_for_cause_effect_classification": 61.6756, + "eval_rouge1_for_coreference_resolution": 45.8405, + "eval_rouge1_for_data_to_text": 39.7953, + "eval_rouge1_for_dialogue_act_recognition": 48.4456, + "eval_rouge1_for_grammar_error_correction": 69.5816, + "eval_rouge1_for_keyword_tagging": 57.706, + "eval_rouge1_for_overlap_extraction": 13.6243, + "eval_rouge1_for_question_rewriting": 67.5316, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 58.4, + "eval_rouge1_for_task034_winogrande_question_rewriting": 65.3873, + "eval_rouge1_for_task035_winogrande_question_rewriting": 77.0968, + "eval_rouge1_for_task036_qasc_keyword_tagging": 68.219, + "eval_rouge1_for_task039_qasc_overlap_extraction": 8.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 71.0, + "eval_rouge1_for_task102_commongen_data_to_text": 61.2509, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 8.0, + "eval_rouge1_for_task1154_bard_word_analogy": 8.6667, + "eval_rouge1_for_task1155_bard_word_analogy": 64.0, + "eval_rouge1_for_task1156_bard_word_analogy": 42.3333, + "eval_rouge1_for_task1157_bard_word_analogy": 42.0, + "eval_rouge1_for_task1158_bard_word_analogy": 24.0, + "eval_rouge1_for_task1159_bard_word_analogy": 19.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 38.7674, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.738, + "eval_rouge1_for_task121_atomic_question_rewriting": 54.2213, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 49.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.608, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 44.0953, + "eval_rouge1_for_task1356_xlsum_title_generation": 24.5292, + "eval_rouge1_for_task1358_xlsum_title_generation": 35.504, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 43.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 36.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 61.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 68.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 74.7857, + "eval_rouge1_for_task1407_dart_data_to_text": 25.8909, + "eval_rouge1_for_task1409_dart_data_to_text": 49.2145, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 51.5078, + "eval_rouge1_for_task1439_doqa_answerability_classification": 61.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 58.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 35.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 72.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 37.4153, + "eval_rouge1_for_task1554_scitail_textual_entailment": 73.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.6553, + "eval_rouge1_for_task1562_zest_question_rewriting": 60.0891, + "eval_rouge1_for_task1586_scifact_title_generation": 39.5499, + "eval_rouge1_for_task1598_nyc_data_to_text": 8.1168, + "eval_rouge1_for_task1612_sick_textual_entailment": 47.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 87.5, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.1903, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 38.5406, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 74.0, + "eval_rouge1_for_task1659_billsum_title_generation": 52.21, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 23.9786, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 55.5189, + "eval_rouge1_for_task190_snli_textual_entailment": 24.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 37.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 85.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 18.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 48.0, + "eval_rouge1_for_task219_rocstories_title_generation": 22.8755, + "eval_rouge1_for_task220_rocstories_title_generation": 97.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 63.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 72.05, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 18.5819, + "eval_rouge1_for_task288_gigaword_title_generation": 30.509, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_rouge1_for_task329_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 63.2857, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 59.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 84.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 37.6614, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 31.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 78.8154, + "eval_rouge1_for_task418_persent_title_generation": 30.7567, + "eval_rouge1_for_task442_com_qa_question_rewriting": 72.4182, + "eval_rouge1_for_task500_scruples_title_generation": 21.0998, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 38.9162, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 88.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 41.3585, + "eval_rouge1_for_task602_wikitext_title_generation": 15.754, + "eval_rouge1_for_task613_liar_keyword_tagging": 24.4648, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 18.7348, + "eval_rouge1_for_task619_ohsumed_title_generation": 49.8739, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 50.4714, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 79.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 35.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 57.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 36.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 66.3746, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 55.9667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 73.1599, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 59.6361, + "eval_rouge1_for_task677_ollie_data_to_text": 38.0813, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 81.0, + "eval_rouge1_for_task743_eurlex_title_generation": 32.6571, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.3123, + "eval_rouge1_for_task769_qed_title_generation": 83.3404, + "eval_rouge1_for_task827_copa_cause_effect_classification": 75.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 62.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 4.8333, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 1.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 49.419, + "eval_rouge1_for_task892_gap_coreference_resolution": 30.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 43.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_rouge1_for_task957_e2e_data_to_text": 50.1938, + "eval_rouge1_for_task970_sherliic_textual_entailment": 71.0, + "eval_rouge1_for_textual_entailment": 48.6042, + "eval_rouge1_for_title_generation": 39.4173, + "eval_rouge1_for_word_analogy": 26.375, + "eval_rougeL": 47.1227, + "eval_rougeL_for_answerability_classification": 58.6154, + "eval_rougeL_for_cause_effect_classification": 61.2842, + "eval_rougeL_for_coreference_resolution": 45.8405, + "eval_rougeL_for_data_to_text": 33.8159, + "eval_rougeL_for_dialogue_act_recognition": 48.4456, + "eval_rougeL_for_grammar_error_correction": 68.3116, + "eval_rougeL_for_keyword_tagging": 57.4822, + "eval_rougeL_for_overlap_extraction": 13.2309, + "eval_rougeL_for_question_rewriting": 62.4855, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 58.4, + "eval_rougeL_for_task034_winogrande_question_rewriting": 61.4866, + "eval_rougeL_for_task035_winogrande_question_rewriting": 68.9048, + "eval_rougeL_for_task036_qasc_keyword_tagging": 68.219, + "eval_rougeL_for_task039_qasc_overlap_extraction": 8.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 71.0, + "eval_rougeL_for_task102_commongen_data_to_text": 55.0177, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 8.0, + "eval_rougeL_for_task1154_bard_word_analogy": 8.6667, + "eval_rougeL_for_task1155_bard_word_analogy": 64.0, + "eval_rougeL_for_task1156_bard_word_analogy": 42.3333, + "eval_rougeL_for_task1157_bard_word_analogy": 42.0, + "eval_rougeL_for_task1158_bard_word_analogy": 24.0, + "eval_rougeL_for_task1159_bard_word_analogy": 19.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 31.8963, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.4676, + "eval_rougeL_for_task121_atomic_question_rewriting": 50.2977, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 49.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.4383, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.8236, + "eval_rougeL_for_task1356_xlsum_title_generation": 21.2159, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.4913, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 43.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 36.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 61.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 68.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 74.7857, + "eval_rougeL_for_task1407_dart_data_to_text": 20.6254, + "eval_rougeL_for_task1409_dart_data_to_text": 42.1272, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 49.8334, + "eval_rougeL_for_task1439_doqa_answerability_classification": 61.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 58.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 35.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 72.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 32.3382, + "eval_rougeL_for_task1554_scitail_textual_entailment": 73.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.7899, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.9726, + "eval_rougeL_for_task1586_scifact_title_generation": 33.578, + "eval_rougeL_for_task1598_nyc_data_to_text": 7.3695, + "eval_rougeL_for_task1612_sick_textual_entailment": 47.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 87.5, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 74.559, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 35.8269, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 74.0, + "eval_rougeL_for_task1659_billsum_title_generation": 49.1447, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 23.9786, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 47.0846, + "eval_rougeL_for_task190_snli_textual_entailment": 24.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 37.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 85.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 18.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 48.0, + "eval_rougeL_for_task219_rocstories_title_generation": 22.447, + "eval_rougeL_for_task220_rocstories_title_generation": 97.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 63.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 72.05, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 17.7952, + "eval_rougeL_for_task288_gigaword_title_generation": 26.5712, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_rougeL_for_task329_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 63.2857, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 59.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 84.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 36.3801, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 31.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 62.6222, + "eval_rougeL_for_task418_persent_title_generation": 26.981, + "eval_rougeL_for_task442_com_qa_question_rewriting": 67.8331, + "eval_rougeL_for_task500_scruples_title_generation": 19.1538, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 38.4781, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 88.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 38.8949, + "eval_rougeL_for_task602_wikitext_title_generation": 15.754, + "eval_rougeL_for_task613_liar_keyword_tagging": 24.4648, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 17.2763, + "eval_rougeL_for_task619_ohsumed_title_generation": 41.4131, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 49.3524, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 79.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 35.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 57.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 36.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 66.3746, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 55.9667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 72.0328, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 58.3399, + "eval_rougeL_for_task677_ollie_data_to_text": 31.8896, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 81.0, + "eval_rougeL_for_task743_eurlex_title_generation": 27.5627, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.3044, + "eval_rougeL_for_task769_qed_title_generation": 82.9404, + "eval_rougeL_for_task827_copa_cause_effect_classification": 75.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 62.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 4.8333, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 1.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 49.419, + "eval_rougeL_for_task892_gap_coreference_resolution": 30.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 43.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_rougeL_for_task957_e2e_data_to_text": 37.7395, + "eval_rougeL_for_task970_sherliic_textual_entailment": 71.0, + "eval_rougeL_for_textual_entailment": 48.6042, + "eval_rougeL_for_title_generation": 36.1983, + "eval_rougeL_for_word_analogy": 26.375, + "eval_runtime": 766.7958, + "eval_samples_per_second": 15.532, + "eval_steps_per_second": 0.972, + "step": 2500 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 1.069, + "step": 3000 + }, + { + "epoch": 0.66, + "eval_exact_match": 27.733, + "eval_exact_match_for_answerability_classification": 53.9231, + "eval_exact_match_for_cause_effect_classification": 47.2857, + "eval_exact_match_for_coreference_resolution": 40.0714, + "eval_exact_match_for_data_to_text": 0.7264, + "eval_exact_match_for_dialogue_act_recognition": 33.7143, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 35.4, + "eval_exact_match_for_overlap_extraction": 2.0, + "eval_exact_match_for_question_rewriting": 2.7273, + "eval_exact_match_for_task020_mctaco_answerability_classification": 56.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 53.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 20.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 4.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 73.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 12.0, + "eval_exact_match_for_task1153_bard_word_analogy": 1.0, + "eval_exact_match_for_task1154_bard_word_analogy": 7.0, + "eval_exact_match_for_task1155_bard_word_analogy": 62.0, + "eval_exact_match_for_task1156_bard_word_analogy": 26.0, + "eval_exact_match_for_task1157_bard_word_analogy": 36.0, + "eval_exact_match_for_task1158_bard_word_analogy": 22.0, + "eval_exact_match_for_task1159_bard_word_analogy": 6.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 13.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 48.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 8.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 11.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 16.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 3.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 68.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 75.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 59.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 41.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 57.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 37.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 64.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 35.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 62.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 12.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_exact_match_for_task1659_billsum_title_generation": 33.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 37.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 80.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 16.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 63.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 43.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 62.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 59.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 45.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 54.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 56.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 62.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 56.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 49.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 2.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 21.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 4.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 87.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 5.0, + "eval_exact_match_for_task602_wikitext_title_generation": 7.1429, + "eval_exact_match_for_task613_liar_keyword_tagging": 13.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 13.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 16.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 35.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 29.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 45.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 41.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 44.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 83.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 66.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 4.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 21.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 2.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 24.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_exact_match_for_task957_e2e_data_to_text": 1.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 77.0, + "eval_exact_match_for_textual_entailment": 36.5833, + "eval_exact_match_for_title_generation": 10.7623, + "eval_exact_match_for_word_analogy": 21.5, + "eval_f1": 41.9767, + "eval_f1_for_answerability_classification": 56.7436, + "eval_f1_for_cause_effect_classification": 62.8429, + "eval_f1_for_coreference_resolution": 45.144, + "eval_f1_for_data_to_text": 36.2651, + "eval_f1_for_dialogue_act_recognition": 36.7271, + "eval_f1_for_grammar_error_correction": 65.2569, + "eval_f1_for_keyword_tagging": 51.9556, + "eval_f1_for_overlap_extraction": 8.7249, + "eval_f1_for_question_rewriting": 49.0399, + "eval_f1_for_task020_mctaco_answerability_classification": 56.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 54.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 8.9585, + "eval_f1_for_task035_winogrande_question_rewriting": 25.6432, + "eval_f1_for_task036_qasc_keyword_tagging": 54.9024, + "eval_f1_for_task039_qasc_overlap_extraction": 4.0, + "eval_f1_for_task050_multirc_answerability_classification": 73.0, + "eval_f1_for_task102_commongen_data_to_text": 50.3418, + "eval_f1_for_task1152_bard_word_analogy": 12.6667, + "eval_f1_for_task1153_bard_word_analogy": 1.0, + "eval_f1_for_task1154_bard_word_analogy": 7.0, + "eval_f1_for_task1155_bard_word_analogy": 62.0, + "eval_f1_for_task1156_bard_word_analogy": 34.0, + "eval_f1_for_task1157_bard_word_analogy": 36.0, + "eval_f1_for_task1158_bard_word_analogy": 22.0, + "eval_f1_for_task1159_bard_word_analogy": 9.3333, + "eval_f1_for_task1161_coda_19_title_generation": 33.6148, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.24, + "eval_f1_for_task121_atomic_question_rewriting": 5.0515, + "eval_f1_for_task133_winowhy_coreference_resolution": 48.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.419, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.8177, + "eval_f1_for_task1356_xlsum_title_generation": 19.4249, + "eval_f1_for_task1358_xlsum_title_generation": 32.3048, + "eval_f1_for_task1385_anli_textual_entailment": 8.0, + "eval_f1_for_task1386_anli_textual_entailment": 11.0, + "eval_f1_for_task1387_anli_textual_entailment": 16.0, + "eval_f1_for_task1388_cb_textual_entailment": 3.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 68.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 75.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 49.0, + "eval_f1_for_task1407_dart_data_to_text": 27.7832, + "eval_f1_for_task1409_dart_data_to_text": 44.0857, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 46.0726, + "eval_f1_for_task1439_doqa_answerability_classification": 53.0, + "eval_f1_for_task1442_doqa_answerability_classification": 59.0, + "eval_f1_for_task1516_imppres_textual_entailment": 41.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 57.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 39.5898, + "eval_f1_for_task1540_peer_read_title_generation": 33.4111, + "eval_f1_for_task1554_scitail_textual_entailment": 64.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.4412, + "eval_f1_for_task1562_zest_question_rewriting": 36.0612, + "eval_f1_for_task1586_scifact_title_generation": 35.108, + "eval_f1_for_task1598_nyc_data_to_text": 8.7136, + "eval_f1_for_task1612_sick_textual_entailment": 35.0, + "eval_f1_for_task1615_sick_textual_entailment": 62.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 73.5744, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_f1_for_task1631_open_pi_data_to_text": 33.4925, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_f1_for_task1659_billsum_title_generation": 47.5945, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 18.0817, + "eval_f1_for_task1728_web_nlg_data_to_text": 52.0077, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 37.0, + "eval_f1_for_task200_multinli_textual_entailment": 80.0, + "eval_f1_for_task201_multinli_textual_entailment": 16.0, + "eval_f1_for_task202_multinli_textual_entailment": 63.0, + "eval_f1_for_task219_rocstories_title_generation": 15.4032, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_f1_for_task232_iirc_answerability_classification": 43.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 62.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 70.6381, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 13.4499, + "eval_f1_for_task288_gigaword_title_generation": 27.2057, + "eval_f1_for_task290_tellmewhy_answerability_classification": 81.6667, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 2.6667, + "eval_f1_for_task329_gap_coreference_resolution": 51.0, + "eval_f1_for_task330_gap_coreference_resolution": 62.1111, + "eval_f1_for_task349_squad2.0_answerability_classification": 56.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 79.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 85.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 38.6618, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 21.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 73.1337, + "eval_f1_for_task418_persent_title_generation": 25.2629, + "eval_f1_for_task442_com_qa_question_rewriting": 70.8042, + "eval_f1_for_task500_scruples_title_generation": 16.469, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 35.088, + "eval_f1_for_task520_aquamuse_answerability_classification": 87.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 32.9433, + "eval_f1_for_task602_wikitext_title_generation": 15.8022, + "eval_f1_for_task613_liar_keyword_tagging": 15.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 8.9055, + "eval_f1_for_task619_ohsumed_title_generation": 45.9289, + "eval_f1_for_task620_ohsumed_keyword_tagging": 32.8198, + "eval_f1_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_f1_for_task640_e_snli_textual_entailment": 16.0, + "eval_f1_for_task641_e_snli_textual_entailment": 35.0, + "eval_f1_for_task642_e_snli_textual_entailment": 29.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 71.0556, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 65.0333, + "eval_f1_for_task670_ambigqa_question_rewriting": 71.0031, + "eval_f1_for_task671_ambigqa_question_rewriting": 55.1518, + "eval_f1_for_task677_ollie_data_to_text": 31.4086, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 28.1846, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.9406, + "eval_f1_for_task769_qed_title_generation": 71.3839, + "eval_f1_for_task827_copa_cause_effect_classification": 83.0, + "eval_f1_for_task828_copa_cause_effect_classification": 66.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 5.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 21.0, + "eval_f1_for_task890_gwsd_textual_entailment": 2.0, + "eval_f1_for_task891_gap_coreference_resolution": 54.4857, + "eval_f1_for_task892_gap_coreference_resolution": 24.0, + "eval_f1_for_task893_gap_coreference_resolution": 40.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_f1_for_task957_e2e_data_to_text": 50.4326, + "eval_f1_for_task970_sherliic_textual_entailment": 77.0, + "eval_f1_for_textual_entailment": 36.5833, + "eval_f1_for_title_generation": 34.9787, + "eval_f1_for_word_analogy": 23.0, + "eval_gen_len": 8.063, + "eval_global_step": 3000, + "eval_loss": 1.535031795501709, + "eval_rouge1": 45.264, + "eval_rouge1_for_answerability_classification": 56.7436, + "eval_rouge1_for_cause_effect_classification": 64.6357, + "eval_rouge1_for_coreference_resolution": 45.4312, + "eval_rouge1_for_data_to_text": 38.8527, + "eval_rouge1_for_dialogue_act_recognition": 40.6155, + "eval_rouge1_for_grammar_error_correction": 67.1722, + "eval_rouge1_for_keyword_tagging": 54.9922, + "eval_rouge1_for_overlap_extraction": 9.4721, + "eval_rouge1_for_question_rewriting": 50.6195, + "eval_rouge1_for_task020_mctaco_answerability_classification": 56.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 54.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 9.0312, + "eval_rouge1_for_task035_winogrande_question_rewriting": 25.9754, + "eval_rouge1_for_task036_qasc_keyword_tagging": 58.6167, + "eval_rouge1_for_task039_qasc_overlap_extraction": 4.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 73.0, + "eval_rouge1_for_task102_commongen_data_to_text": 58.3395, + "eval_rouge1_for_task1152_bard_word_analogy": 12.6667, + "eval_rouge1_for_task1153_bard_word_analogy": 1.0, + "eval_rouge1_for_task1154_bard_word_analogy": 7.0, + "eval_rouge1_for_task1155_bard_word_analogy": 62.0, + "eval_rouge1_for_task1156_bard_word_analogy": 34.0, + "eval_rouge1_for_task1157_bard_word_analogy": 36.0, + "eval_rouge1_for_task1158_bard_word_analogy": 22.0, + "eval_rouge1_for_task1159_bard_word_analogy": 9.3333, + "eval_rouge1_for_task1161_coda_19_title_generation": 37.5096, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.5323, + "eval_rouge1_for_task121_atomic_question_rewriting": 7.9065, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 48.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.2573, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.9903, + "eval_rouge1_for_task1356_xlsum_title_generation": 22.0157, + "eval_rouge1_for_task1358_xlsum_title_generation": 36.9296, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 39.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 68.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 75.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 55.6, + "eval_rouge1_for_task1407_dart_data_to_text": 29.7277, + "eval_rouge1_for_task1409_dart_data_to_text": 45.0199, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 46.9188, + "eval_rouge1_for_task1439_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 59.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 41.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 57.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 39.5898, + "eval_rouge1_for_task1540_peer_read_title_generation": 37.0625, + "eval_rouge1_for_task1554_scitail_textual_entailment": 64.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.4256, + "eval_rouge1_for_task1562_zest_question_rewriting": 38.2257, + "eval_rouge1_for_task1586_scifact_title_generation": 40.0995, + "eval_rouge1_for_task1598_nyc_data_to_text": 8.6285, + "eval_rouge1_for_task1612_sick_textual_entailment": 35.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 87.1667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 74.5365, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 34.8249, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_rouge1_for_task1659_billsum_title_generation": 49.2808, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 18.0817, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 54.6228, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 37.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 80.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 16.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 63.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.2959, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 43.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 62.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 71.4833, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 14.2775, + "eval_rouge1_for_task288_gigaword_title_generation": 30.4638, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 81.6667, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 2.6667, + "eval_rouge1_for_task329_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 62.0667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 56.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 79.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 39.6864, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 24.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 75.133, + "eval_rouge1_for_task418_persent_title_generation": 29.2176, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.0882, + "eval_rouge1_for_task500_scruples_title_generation": 19.0363, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 34.9449, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 87.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 35.0625, + "eval_rouge1_for_task602_wikitext_title_generation": 16.7452, + "eval_rouge1_for_task613_liar_keyword_tagging": 20.3524, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 20.4298, + "eval_rouge1_for_task619_ohsumed_title_generation": 49.0655, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 38.5175, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 43.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 35.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 29.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 71.4746, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 65.1667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 71.9961, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 56.3997, + "eval_rouge1_for_task677_ollie_data_to_text": 36.0022, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 86.0, + "eval_rouge1_for_task743_eurlex_title_generation": 30.2714, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.1743, + "eval_rouge1_for_task769_qed_title_generation": 70.7468, + "eval_rouge1_for_task827_copa_cause_effect_classification": 83.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 66.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 4.9524, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 2.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 54.5714, + "eval_rouge1_for_task892_gap_coreference_resolution": 24.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 40.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rouge1_for_task957_e2e_data_to_text": 52.4129, + "eval_rouge1_for_task970_sherliic_textual_entailment": 77.0, + "eval_rouge1_for_textual_entailment": 46.7986, + "eval_rouge1_for_title_generation": 37.4062, + "eval_rouge1_for_word_analogy": 23.0, + "eval_rougeL": 44.0262, + "eval_rougeL_for_answerability_classification": 56.7436, + "eval_rougeL_for_cause_effect_classification": 64.2677, + "eval_rougeL_for_coreference_resolution": 45.4312, + "eval_rougeL_for_data_to_text": 33.0534, + "eval_rougeL_for_dialogue_act_recognition": 40.6155, + "eval_rougeL_for_grammar_error_correction": 65.336, + "eval_rougeL_for_keyword_tagging": 54.676, + "eval_rougeL_for_overlap_extraction": 9.0591, + "eval_rougeL_for_question_rewriting": 46.8753, + "eval_rougeL_for_task020_mctaco_answerability_classification": 56.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 54.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 8.4724, + "eval_rougeL_for_task035_winogrande_question_rewriting": 23.1453, + "eval_rougeL_for_task036_qasc_keyword_tagging": 58.0833, + "eval_rougeL_for_task039_qasc_overlap_extraction": 4.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 73.0, + "eval_rougeL_for_task102_commongen_data_to_text": 52.3063, + "eval_rougeL_for_task1152_bard_word_analogy": 12.6667, + "eval_rougeL_for_task1153_bard_word_analogy": 1.0, + "eval_rougeL_for_task1154_bard_word_analogy": 7.0, + "eval_rougeL_for_task1155_bard_word_analogy": 62.0, + "eval_rougeL_for_task1156_bard_word_analogy": 34.0, + "eval_rougeL_for_task1157_bard_word_analogy": 36.0, + "eval_rougeL_for_task1158_bard_word_analogy": 22.0, + "eval_rougeL_for_task1159_bard_word_analogy": 9.3333, + "eval_rougeL_for_task1161_coda_19_title_generation": 32.4806, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.2588, + "eval_rougeL_for_task121_atomic_question_rewriting": 7.7527, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 48.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.9011, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.9797, + "eval_rougeL_for_task1356_xlsum_title_generation": 19.5825, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.3973, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 39.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 68.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 75.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 55.6, + "eval_rougeL_for_task1407_dart_data_to_text": 24.0506, + "eval_rougeL_for_task1409_dart_data_to_text": 38.5657, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 44.1522, + "eval_rougeL_for_task1439_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 59.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 41.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 57.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 39.5898, + "eval_rougeL_for_task1540_peer_read_title_generation": 32.9144, + "eval_rougeL_for_task1554_scitail_textual_entailment": 64.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.5199, + "eval_rougeL_for_task1562_zest_question_rewriting": 30.557, + "eval_rougeL_for_task1586_scifact_title_generation": 34.1334, + "eval_rougeL_for_task1598_nyc_data_to_text": 7.7917, + "eval_rougeL_for_task1612_sick_textual_entailment": 35.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 87.1667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 70.547, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 32.1015, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 64.0, + "eval_rougeL_for_task1659_billsum_title_generation": 46.0715, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 18.0817, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 47.4279, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 37.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 80.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 16.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 63.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.2959, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 43.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 62.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 71.4833, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 13.4515, + "eval_rougeL_for_task288_gigaword_title_generation": 26.5588, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 81.6667, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 2.6667, + "eval_rougeL_for_task329_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 62.0667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 56.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 79.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 38.5975, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 24.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 59.9512, + "eval_rougeL_for_task418_persent_title_generation": 26.0497, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.6758, + "eval_rougeL_for_task500_scruples_title_generation": 17.0985, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 34.9449, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 87.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 32.5681, + "eval_rougeL_for_task602_wikitext_title_generation": 16.6106, + "eval_rougeL_for_task613_liar_keyword_tagging": 20.3524, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 18.9431, + "eval_rougeL_for_task619_ohsumed_title_generation": 42.1863, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 37.4698, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 43.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 35.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 29.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 71.4746, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 65.1667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 70.5445, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 54.7443, + "eval_rougeL_for_task677_ollie_data_to_text": 29.5155, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 86.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.7669, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.4818, + "eval_rougeL_for_task769_qed_title_generation": 70.7468, + "eval_rougeL_for_task827_copa_cause_effect_classification": 83.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 66.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 4.9524, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 2.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 54.5714, + "eval_rougeL_for_task892_gap_coreference_resolution": 24.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 40.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.0967, + "eval_rougeL_for_task970_sherliic_textual_entailment": 77.0, + "eval_rougeL_for_textual_entailment": 46.7986, + "eval_rougeL_for_title_generation": 34.6216, + "eval_rougeL_for_word_analogy": 23.0, + "eval_runtime": 850.2026, + "eval_samples_per_second": 14.008, + "eval_steps_per_second": 0.876, + "step": 3000 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 1.0175, + "step": 3500 + }, + { + "epoch": 0.76, + "eval_exact_match": 27.3971, + "eval_exact_match_for_answerability_classification": 45.3846, + "eval_exact_match_for_cause_effect_classification": 46.0, + "eval_exact_match_for_coreference_resolution": 42.2857, + "eval_exact_match_for_data_to_text": 0.4843, + "eval_exact_match_for_dialogue_act_recognition": 37.7143, + "eval_exact_match_for_grammar_error_correction": 6.5, + "eval_exact_match_for_keyword_tagging": 34.6, + "eval_exact_match_for_overlap_extraction": 1.0, + "eval_exact_match_for_question_rewriting": 2.8182, + "eval_exact_match_for_task020_mctaco_answerability_classification": 49.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 51.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 30.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 2.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 68.0, + "eval_exact_match_for_task102_commongen_data_to_text": 1.0, + "eval_exact_match_for_task1152_bard_word_analogy": 3.0, + "eval_exact_match_for_task1153_bard_word_analogy": 4.0, + "eval_exact_match_for_task1154_bard_word_analogy": 6.0, + "eval_exact_match_for_task1155_bard_word_analogy": 76.0, + "eval_exact_match_for_task1156_bard_word_analogy": 26.0, + "eval_exact_match_for_task1157_bard_word_analogy": 32.0, + "eval_exact_match_for_task1158_bard_word_analogy": 19.0, + "eval_exact_match_for_task1159_bard_word_analogy": 5.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 10.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 47.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 18.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 20.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 25.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 6.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 75.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 69.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 37.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 55.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 58.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 39.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 65.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 15.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_exact_match_for_task1659_billsum_title_generation": 38.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 23.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 37.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 84.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 13.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 64.0, + "eval_exact_match_for_task219_rocstories_title_generation": 1.0, + "eval_exact_match_for_task220_rocstories_title_generation": 99.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 20.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 30.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 54.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 63.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 48.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 64.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 52.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 37.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 53.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 62.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 5.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 4.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 67.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 4.0, + "eval_exact_match_for_task602_wikitext_title_generation": 8.3333, + "eval_exact_match_for_task613_liar_keyword_tagging": 11.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 20.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 2.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 37.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 62.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 46.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 53.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 71.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 61.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 4.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 18.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 25.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 58.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 63.0, + "eval_exact_match_for_textual_entailment": 37.1667, + "eval_exact_match_for_title_generation": 11.7152, + "eval_exact_match_for_word_analogy": 21.375, + "eval_f1": 43.2661, + "eval_f1_for_answerability_classification": 48.0513, + "eval_f1_for_cause_effect_classification": 59.8849, + "eval_f1_for_coreference_resolution": 47.9127, + "eval_f1_for_data_to_text": 37.517, + "eval_f1_for_dialogue_act_recognition": 41.1429, + "eval_f1_for_grammar_error_correction": 64.5923, + "eval_f1_for_keyword_tagging": 49.7229, + "eval_f1_for_overlap_extraction": 9.2939, + "eval_f1_for_question_rewriting": 64.835, + "eval_f1_for_task020_mctaco_answerability_classification": 49.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 53.0, + "eval_f1_for_task034_winogrande_question_rewriting": 33.9049, + "eval_f1_for_task035_winogrande_question_rewriting": 86.009, + "eval_f1_for_task036_qasc_keyword_tagging": 64.1024, + "eval_f1_for_task039_qasc_overlap_extraction": 5.0, + "eval_f1_for_task050_multirc_answerability_classification": 68.0, + "eval_f1_for_task102_commongen_data_to_text": 51.2818, + "eval_f1_for_task1152_bard_word_analogy": 3.0, + "eval_f1_for_task1153_bard_word_analogy": 4.0, + "eval_f1_for_task1154_bard_word_analogy": 8.0, + "eval_f1_for_task1155_bard_word_analogy": 76.0, + "eval_f1_for_task1156_bard_word_analogy": 26.0, + "eval_f1_for_task1157_bard_word_analogy": 32.0, + "eval_f1_for_task1158_bard_word_analogy": 19.0, + "eval_f1_for_task1159_bard_word_analogy": 10.3333, + "eval_f1_for_task1161_coda_19_title_generation": 37.3655, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.3855, + "eval_f1_for_task121_atomic_question_rewriting": 51.6805, + "eval_f1_for_task133_winowhy_coreference_resolution": 47.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.8374, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.7323, + "eval_f1_for_task1356_xlsum_title_generation": 21.8965, + "eval_f1_for_task1358_xlsum_title_generation": 34.076, + "eval_f1_for_task1385_anli_textual_entailment": 18.0, + "eval_f1_for_task1386_anli_textual_entailment": 20.0, + "eval_f1_for_task1387_anli_textual_entailment": 25.0, + "eval_f1_for_task1388_cb_textual_entailment": 6.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 75.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 69.0, + "eval_f1_for_task1407_dart_data_to_text": 27.0848, + "eval_f1_for_task1409_dart_data_to_text": 49.241, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 44.1248, + "eval_f1_for_task1439_doqa_answerability_classification": 53.0, + "eval_f1_for_task1442_doqa_answerability_classification": 54.0, + "eval_f1_for_task1516_imppres_textual_entailment": 37.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 55.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 33.2663, + "eval_f1_for_task1554_scitail_textual_entailment": 58.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 85.0597, + "eval_f1_for_task1562_zest_question_rewriting": 57.6755, + "eval_f1_for_task1586_scifact_title_generation": 36.8991, + "eval_f1_for_task1598_nyc_data_to_text": 11.1437, + "eval_f1_for_task1612_sick_textual_entailment": 39.0, + "eval_f1_for_task1615_sick_textual_entailment": 65.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.1489, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_f1_for_task1631_open_pi_data_to_text": 38.3893, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_f1_for_task1659_billsum_title_generation": 52.9698, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 30.6, + "eval_f1_for_task1728_web_nlg_data_to_text": 49.1176, + "eval_f1_for_task190_snli_textual_entailment": 23.0, + "eval_f1_for_task199_multinli_textual_entailment": 37.0, + "eval_f1_for_task200_multinli_textual_entailment": 84.0, + "eval_f1_for_task201_multinli_textual_entailment": 13.0, + "eval_f1_for_task202_multinli_textual_entailment": 64.0, + "eval_f1_for_task219_rocstories_title_generation": 23.0717, + "eval_f1_for_task220_rocstories_title_generation": 99.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_f1_for_task232_iirc_answerability_classification": 20.0, + "eval_f1_for_task233_iirc_answerability_classification": 30.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 54.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 72.6333, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 13.5877, + "eval_f1_for_task288_gigaword_title_generation": 27.8715, + "eval_f1_for_task290_tellmewhy_answerability_classification": 82.6667, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 3.3333, + "eval_f1_for_task329_gap_coreference_resolution": 51.0, + "eval_f1_for_task330_gap_coreference_resolution": 68.6381, + "eval_f1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 59.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 84.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 35.6902, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 4.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 77.5793, + "eval_f1_for_task418_persent_title_generation": 25.4653, + "eval_f1_for_task442_com_qa_question_rewriting": 72.1064, + "eval_f1_for_task500_scruples_title_generation": 15.7934, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.1232, + "eval_f1_for_task520_aquamuse_answerability_classification": 67.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 36.5743, + "eval_f1_for_task602_wikitext_title_generation": 15.0416, + "eval_f1_for_task613_liar_keyword_tagging": 18.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 4.8374, + "eval_f1_for_task619_ohsumed_title_generation": 46.0581, + "eval_f1_for_task620_ohsumed_keyword_tagging": 40.069, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 2.0, + "eval_f1_for_task641_e_snli_textual_entailment": 37.0, + "eval_f1_for_task642_e_snli_textual_entailment": 33.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 76.1095, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 69.2619, + "eval_f1_for_task670_ambigqa_question_rewriting": 72.9285, + "eval_f1_for_task671_ambigqa_question_rewriting": 61.0341, + "eval_f1_for_task677_ollie_data_to_text": 34.2799, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 29.9602, + "eval_f1_for_task760_msr_sqa_data_to_text": 7.135, + "eval_f1_for_task769_qed_title_generation": 75.6943, + "eval_f1_for_task827_copa_cause_effect_classification": 71.0, + "eval_f1_for_task828_copa_cause_effect_classification": 61.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 6.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 18.0, + "eval_f1_for_task890_gwsd_textual_entailment": 25.0, + "eval_f1_for_task891_gap_coreference_resolution": 59.6444, + "eval_f1_for_task892_gap_coreference_resolution": 48.0, + "eval_f1_for_task893_gap_coreference_resolution": 58.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_f1_for_task957_e2e_data_to_text": 47.4976, + "eval_f1_for_task970_sherliic_textual_entailment": 63.0, + "eval_f1_for_textual_entailment": 37.1667, + "eval_f1_for_title_generation": 37.0268, + "eval_f1_for_word_analogy": 22.2917, + "eval_gen_len": 7.731, + "eval_global_step": 3500, + "eval_loss": 1.502916932106018, + "eval_rouge1": 46.1767, + "eval_rouge1_for_answerability_classification": 48.0513, + "eval_rouge1_for_cause_effect_classification": 61.8624, + "eval_rouge1_for_coreference_resolution": 48.0139, + "eval_rouge1_for_data_to_text": 39.6974, + "eval_rouge1_for_dialogue_act_recognition": 44.0361, + "eval_rouge1_for_grammar_error_correction": 67.1273, + "eval_rouge1_for_keyword_tagging": 54.1795, + "eval_rouge1_for_overlap_extraction": 11.5218, + "eval_rouge1_for_question_rewriting": 66.3945, + "eval_rouge1_for_task020_mctaco_answerability_classification": 49.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 53.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 33.8705, + "eval_rouge1_for_task035_winogrande_question_rewriting": 86.9263, + "eval_rouge1_for_task036_qasc_keyword_tagging": 70.3548, + "eval_rouge1_for_task039_qasc_overlap_extraction": 8.5, + "eval_rouge1_for_task050_multirc_answerability_classification": 68.0, + "eval_rouge1_for_task102_commongen_data_to_text": 57.2572, + "eval_rouge1_for_task1152_bard_word_analogy": 3.0, + "eval_rouge1_for_task1153_bard_word_analogy": 4.0, + "eval_rouge1_for_task1154_bard_word_analogy": 8.0, + "eval_rouge1_for_task1155_bard_word_analogy": 76.0, + "eval_rouge1_for_task1156_bard_word_analogy": 26.0, + "eval_rouge1_for_task1157_bard_word_analogy": 32.0, + "eval_rouge1_for_task1158_bard_word_analogy": 19.0, + "eval_rouge1_for_task1159_bard_word_analogy": 10.3333, + "eval_rouge1_for_task1161_coda_19_title_generation": 40.8993, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 81.8016, + "eval_rouge1_for_task121_atomic_question_rewriting": 53.4149, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 47.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.2437, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.9078, + "eval_rouge1_for_task1356_xlsum_title_generation": 26.4165, + "eval_rouge1_for_task1358_xlsum_title_generation": 38.1794, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 42.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 33.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 75.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 71.5857, + "eval_rouge1_for_task1407_dart_data_to_text": 28.6759, + "eval_rouge1_for_task1409_dart_data_to_text": 50.1282, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 46.3726, + "eval_rouge1_for_task1439_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 37.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 55.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 36.9333, + "eval_rouge1_for_task1554_scitail_textual_entailment": 58.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.8819, + "eval_rouge1_for_task1562_zest_question_rewriting": 60.691, + "eval_rouge1_for_task1586_scifact_title_generation": 40.6718, + "eval_rouge1_for_task1598_nyc_data_to_text": 11.0068, + "eval_rouge1_for_task1612_sick_textual_entailment": 39.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 88.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.9058, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 38.9337, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_rouge1_for_task1659_billsum_title_generation": 54.644, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 30.6, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 52.5338, + "eval_rouge1_for_task190_snli_textual_entailment": 23.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 37.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 84.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 13.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 64.0, + "eval_rouge1_for_task219_rocstories_title_generation": 24.1734, + "eval_rouge1_for_task220_rocstories_title_generation": 99.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 20.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 30.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 54.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 72.8, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 14.5435, + "eval_rouge1_for_task288_gigaword_title_generation": 31.785, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 82.6667, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 3.1667, + "eval_rouge1_for_task329_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 68.5667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 59.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 84.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 37.077, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 5.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 79.7913, + "eval_rouge1_for_task418_persent_title_generation": 29.9793, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.5952, + "eval_rouge1_for_task500_scruples_title_generation": 17.3825, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 36.9829, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 67.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 39.0768, + "eval_rouge1_for_task602_wikitext_title_generation": 16.361, + "eval_rouge1_for_task613_liar_keyword_tagging": 25.0833, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 17.2932, + "eval_rouge1_for_task619_ohsumed_title_generation": 49.292, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 48.35, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 28.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 37.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 77.1095, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 69.8952, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 74.2325, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 62.2025, + "eval_rouge1_for_task677_ollie_data_to_text": 37.8941, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 83.0, + "eval_rouge1_for_task743_eurlex_title_generation": 32.2053, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.2264, + "eval_rouge1_for_task769_qed_title_generation": 75.7884, + "eval_rouge1_for_task827_copa_cause_effect_classification": 71.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 61.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 6.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 32.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 25.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 59.5, + "eval_rouge1_for_task892_gap_coreference_resolution": 48.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 58.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_rouge1_for_task957_e2e_data_to_text": 49.592, + "eval_rouge1_for_task970_sherliic_textual_entailment": 63.0, + "eval_rouge1_for_textual_entailment": 45.5972, + "eval_rouge1_for_title_generation": 39.372, + "eval_rouge1_for_word_analogy": 22.2917, + "eval_rougeL": 44.7559, + "eval_rougeL_for_answerability_classification": 48.0513, + "eval_rougeL_for_cause_effect_classification": 61.3954, + "eval_rougeL_for_coreference_resolution": 48.0139, + "eval_rougeL_for_data_to_text": 33.7564, + "eval_rougeL_for_dialogue_act_recognition": 44.0361, + "eval_rougeL_for_grammar_error_correction": 65.5593, + "eval_rougeL_for_keyword_tagging": 53.6052, + "eval_rougeL_for_overlap_extraction": 11.5218, + "eval_rougeL_for_question_rewriting": 61.1973, + "eval_rougeL_for_task020_mctaco_answerability_classification": 49.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 53.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 32.4315, + "eval_rougeL_for_task035_winogrande_question_rewriting": 76.2554, + "eval_rougeL_for_task036_qasc_keyword_tagging": 69.4833, + "eval_rougeL_for_task039_qasc_overlap_extraction": 8.5, + "eval_rougeL_for_task050_multirc_answerability_classification": 68.0, + "eval_rougeL_for_task102_commongen_data_to_text": 51.8187, + "eval_rougeL_for_task1152_bard_word_analogy": 3.0, + "eval_rougeL_for_task1153_bard_word_analogy": 4.0, + "eval_rougeL_for_task1154_bard_word_analogy": 8.0, + "eval_rougeL_for_task1155_bard_word_analogy": 76.0, + "eval_rougeL_for_task1156_bard_word_analogy": 26.0, + "eval_rougeL_for_task1157_bard_word_analogy": 32.0, + "eval_rougeL_for_task1158_bard_word_analogy": 19.0, + "eval_rougeL_for_task1159_bard_word_analogy": 10.3333, + "eval_rougeL_for_task1161_coda_19_title_generation": 35.1309, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 80.5811, + "eval_rougeL_for_task121_atomic_question_rewriting": 49.4697, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 47.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.7822, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.8999, + "eval_rougeL_for_task1356_xlsum_title_generation": 23.3777, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.8534, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 42.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 33.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 55.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 75.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 71.5857, + "eval_rougeL_for_task1407_dart_data_to_text": 21.9961, + "eval_rougeL_for_task1409_dart_data_to_text": 43.0063, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 44.1288, + "eval_rougeL_for_task1439_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 37.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 55.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 32.6058, + "eval_rougeL_for_task1554_scitail_textual_entailment": 58.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.9899, + "eval_rougeL_for_task1562_zest_question_rewriting": 52.5167, + "eval_rougeL_for_task1586_scifact_title_generation": 34.7215, + "eval_rougeL_for_task1598_nyc_data_to_text": 10.2757, + "eval_rougeL_for_task1612_sick_textual_entailment": 39.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 88.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 74.8804, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 36.5039, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_rougeL_for_task1659_billsum_title_generation": 50.9842, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 30.6, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 44.9311, + "eval_rougeL_for_task190_snli_textual_entailment": 23.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 37.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 84.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 13.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 64.0, + "eval_rougeL_for_task219_rocstories_title_generation": 24.1734, + "eval_rougeL_for_task220_rocstories_title_generation": 99.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 47.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 20.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 30.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 54.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 72.8, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 14.5435, + "eval_rougeL_for_task288_gigaword_title_generation": 27.3793, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 82.6667, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 3.1667, + "eval_rougeL_for_task329_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 68.5667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 59.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 84.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 35.1469, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 5.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 62.4265, + "eval_rougeL_for_task418_persent_title_generation": 26.9288, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.3519, + "eval_rougeL_for_task500_scruples_title_generation": 16.9243, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.9829, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 67.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 35.9483, + "eval_rougeL_for_task602_wikitext_title_generation": 16.2419, + "eval_rougeL_for_task613_liar_keyword_tagging": 25.0833, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 15.9545, + "eval_rougeL_for_task619_ohsumed_title_generation": 41.1531, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 46.35, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 28.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 37.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 77.1095, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 69.8952, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 72.7367, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 60.6207, + "eval_rougeL_for_task677_ollie_data_to_text": 31.4482, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 83.0, + "eval_rougeL_for_task743_eurlex_title_generation": 26.9766, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.311, + "eval_rougeL_for_task769_qed_title_generation": 75.1217, + "eval_rougeL_for_task827_copa_cause_effect_classification": 71.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 61.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 6.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 32.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 25.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 59.5, + "eval_rougeL_for_task892_gap_coreference_resolution": 48.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 58.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_rougeL_for_task957_e2e_data_to_text": 37.2067, + "eval_rougeL_for_task970_sherliic_textual_entailment": 63.0, + "eval_rougeL_for_textual_entailment": 45.5972, + "eval_rougeL_for_title_generation": 36.3614, + "eval_rougeL_for_word_analogy": 22.2917, + "eval_runtime": 728.319, + "eval_samples_per_second": 16.353, + "eval_steps_per_second": 1.023, + "step": 3500 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.9719, + "step": 4000 + }, + { + "epoch": 0.87, + "eval_exact_match": 26.8346, + "eval_exact_match_for_answerability_classification": 44.4615, + "eval_exact_match_for_cause_effect_classification": 45.4286, + "eval_exact_match_for_coreference_resolution": 37.8571, + "eval_exact_match_for_data_to_text": 0.3632, + "eval_exact_match_for_dialogue_act_recognition": 39.8571, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 33.4, + "eval_exact_match_for_overlap_extraction": 1.0, + "eval_exact_match_for_question_rewriting": 2.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 55.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 54.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 29.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 2.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 69.0, + "eval_exact_match_for_task102_commongen_data_to_text": 1.0, + "eval_exact_match_for_task1152_bard_word_analogy": 11.0, + "eval_exact_match_for_task1153_bard_word_analogy": 11.0, + "eval_exact_match_for_task1154_bard_word_analogy": 10.0, + "eval_exact_match_for_task1155_bard_word_analogy": 56.0, + "eval_exact_match_for_task1156_bard_word_analogy": 20.0, + "eval_exact_match_for_task1157_bard_word_analogy": 43.0, + "eval_exact_match_for_task1158_bard_word_analogy": 16.0, + "eval_exact_match_for_task1159_bard_word_analogy": 14.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 0.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 69.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 19.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 25.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 26.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 10.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 76.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 67.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 59.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 8.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 66.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 77.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 40.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 63.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 13.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1659_billsum_title_generation": 35.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 1.0, + "eval_exact_match_for_task190_snli_textual_entailment": 4.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 36.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 92.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 18.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 43.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 97.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 83.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 54.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 49.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 58.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 52.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 57.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 11.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 1.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, + "eval_exact_match_for_task602_wikitext_title_generation": 7.1429, + "eval_exact_match_for_task613_liar_keyword_tagging": 10.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 59.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 26.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 38.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 53.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 42.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 58.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 81.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 1.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 22.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 54.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 53.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 42.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 41.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 38.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 66.0, + "eval_exact_match_for_textual_entailment": 37.0833, + "eval_exact_match_for_title_generation": 11.7152, + "eval_exact_match_for_word_analogy": 22.625, + "eval_f1": 42.5274, + "eval_f1_for_answerability_classification": 47.0769, + "eval_f1_for_cause_effect_classification": 60.087, + "eval_f1_for_coreference_resolution": 43.4052, + "eval_f1_for_data_to_text": 37.2965, + "eval_f1_for_dialogue_act_recognition": 43.7857, + "eval_f1_for_grammar_error_correction": 66.114, + "eval_f1_for_keyword_tagging": 50.1587, + "eval_f1_for_overlap_extraction": 8.1596, + "eval_f1_for_question_rewriting": 61.2455, + "eval_f1_for_task020_mctaco_answerability_classification": 55.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 54.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 33.3476, + "eval_f1_for_task035_winogrande_question_rewriting": 86.7058, + "eval_f1_for_task036_qasc_keyword_tagging": 62.459, + "eval_f1_for_task039_qasc_overlap_extraction": 2.6667, + "eval_f1_for_task050_multirc_answerability_classification": 69.0, + "eval_f1_for_task102_commongen_data_to_text": 48.4604, + "eval_f1_for_task1152_bard_word_analogy": 11.0, + "eval_f1_for_task1153_bard_word_analogy": 11.0, + "eval_f1_for_task1154_bard_word_analogy": 10.0, + "eval_f1_for_task1155_bard_word_analogy": 56.0, + "eval_f1_for_task1156_bard_word_analogy": 23.8333, + "eval_f1_for_task1157_bard_word_analogy": 43.0, + "eval_f1_for_task1158_bard_word_analogy": 16.6667, + "eval_f1_for_task1159_bard_word_analogy": 16.6667, + "eval_f1_for_task1161_coda_19_title_generation": 37.4228, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.0632, + "eval_f1_for_task121_atomic_question_rewriting": 51.5399, + "eval_f1_for_task133_winowhy_coreference_resolution": 0.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 12.1505, + "eval_f1_for_task1344_rte_textual_entailment": 69.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.7736, + "eval_f1_for_task1356_xlsum_title_generation": 23.0236, + "eval_f1_for_task1358_xlsum_title_generation": 32.1369, + "eval_f1_for_task1385_anli_textual_entailment": 19.0, + "eval_f1_for_task1386_anli_textual_entailment": 25.0, + "eval_f1_for_task1387_anli_textual_entailment": 26.0, + "eval_f1_for_task1388_cb_textual_entailment": 10.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 76.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 67.0, + "eval_f1_for_task1407_dart_data_to_text": 25.1491, + "eval_f1_for_task1409_dart_data_to_text": 43.9175, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 47.9718, + "eval_f1_for_task1439_doqa_answerability_classification": 54.0, + "eval_f1_for_task1442_doqa_answerability_classification": 59.0, + "eval_f1_for_task1516_imppres_textual_entailment": 8.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 66.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 53.0, + "eval_f1_for_task1540_peer_read_title_generation": 32.1386, + "eval_f1_for_task1554_scitail_textual_entailment": 77.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.2562, + "eval_f1_for_task1562_zest_question_rewriting": 57.9351, + "eval_f1_for_task1586_scifact_title_generation": 34.6569, + "eval_f1_for_task1598_nyc_data_to_text": 5.5384, + "eval_f1_for_task1612_sick_textual_entailment": 40.0, + "eval_f1_for_task1615_sick_textual_entailment": 63.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.5228, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_f1_for_task1631_open_pi_data_to_text": 58.6414, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_f1_for_task1659_billsum_title_generation": 50.4202, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 35.7714, + "eval_f1_for_task1728_web_nlg_data_to_text": 45.1525, + "eval_f1_for_task190_snli_textual_entailment": 4.0, + "eval_f1_for_task199_multinli_textual_entailment": 36.0, + "eval_f1_for_task200_multinli_textual_entailment": 92.0, + "eval_f1_for_task201_multinli_textual_entailment": 18.0, + "eval_f1_for_task202_multinli_textual_entailment": 43.0, + "eval_f1_for_task219_rocstories_title_generation": 19.6465, + "eval_f1_for_task220_rocstories_title_generation": 97.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_f1_for_task232_iirc_answerability_classification": 0.0, + "eval_f1_for_task233_iirc_answerability_classification": 0.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 83.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 63.7048, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 13.6525, + "eval_f1_for_task288_gigaword_title_generation": 28.2722, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_f1_for_task329_gap_coreference_resolution": 51.0, + "eval_f1_for_task330_gap_coreference_resolution": 64.1349, + "eval_f1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 70.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 85.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 35.5262, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 11.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 80.0651, + "eval_f1_for_task418_persent_title_generation": 26.9415, + "eval_f1_for_task442_com_qa_question_rewriting": 63.9695, + "eval_f1_for_task500_scruples_title_generation": 15.4269, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.5875, + "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 43.2894, + "eval_f1_for_task602_wikitext_title_generation": 16.9616, + "eval_f1_for_task613_liar_keyword_tagging": 14.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 5.0825, + "eval_f1_for_task619_ohsumed_title_generation": 45.4559, + "eval_f1_for_task620_ohsumed_keyword_tagging": 41.2346, + "eval_f1_for_task623_ohsumed_keyword_tagging": 59.0, + "eval_f1_for_task640_e_snli_textual_entailment": 26.0, + "eval_f1_for_task641_e_snli_textual_entailment": 31.0, + "eval_f1_for_task642_e_snli_textual_entailment": 38.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 74.1, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 60.9094, + "eval_f1_for_task670_ambigqa_question_rewriting": 64.1811, + "eval_f1_for_task671_ambigqa_question_rewriting": 36.5972, + "eval_f1_for_task677_ollie_data_to_text": 29.7584, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 29.5509, + "eval_f1_for_task760_msr_sqa_data_to_text": 5.9009, + "eval_f1_for_task769_qed_title_generation": 79.5918, + "eval_f1_for_task827_copa_cause_effect_classification": 81.0, + "eval_f1_for_task828_copa_cause_effect_classification": 54.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 22.0, + "eval_f1_for_task890_gwsd_textual_entailment": 54.0, + "eval_f1_for_task891_gap_coreference_resolution": 58.819, + "eval_f1_for_task892_gap_coreference_resolution": 42.0, + "eval_f1_for_task893_gap_coreference_resolution": 50.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 41.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 38.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_f1_for_task957_e2e_data_to_text": 49.9176, + "eval_f1_for_task970_sherliic_textual_entailment": 66.0, + "eval_f1_for_textual_entailment": 37.0833, + "eval_f1_for_title_generation": 36.9372, + "eval_f1_for_word_analogy": 23.5208, + "eval_gen_len": 8.0645, + "eval_global_step": 4000, + "eval_loss": 1.5200403928756714, + "eval_rouge1": 45.3168, + "eval_rouge1_for_answerability_classification": 47.0769, + "eval_rouge1_for_cause_effect_classification": 62.0908, + "eval_rouge1_for_coreference_resolution": 43.6764, + "eval_rouge1_for_data_to_text": 39.5076, + "eval_rouge1_for_dialogue_act_recognition": 46.2503, + "eval_rouge1_for_grammar_error_correction": 70.2424, + "eval_rouge1_for_keyword_tagging": 54.4371, + "eval_rouge1_for_overlap_extraction": 8.8478, + "eval_rouge1_for_question_rewriting": 62.8016, + "eval_rouge1_for_task020_mctaco_answerability_classification": 55.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 54.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 33.3238, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.5926, + "eval_rouge1_for_task036_qasc_keyword_tagging": 70.1923, + "eval_rouge1_for_task039_qasc_overlap_extraction": 3.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 69.0, + "eval_rouge1_for_task102_commongen_data_to_text": 54.7842, + "eval_rouge1_for_task1152_bard_word_analogy": 11.0, + "eval_rouge1_for_task1153_bard_word_analogy": 12.0, + "eval_rouge1_for_task1154_bard_word_analogy": 10.0, + "eval_rouge1_for_task1155_bard_word_analogy": 56.0, + "eval_rouge1_for_task1156_bard_word_analogy": 25.1667, + "eval_rouge1_for_task1157_bard_word_analogy": 43.0, + "eval_rouge1_for_task1158_bard_word_analogy": 16.6667, + "eval_rouge1_for_task1159_bard_word_analogy": 16.6667, + "eval_rouge1_for_task1161_coda_19_title_generation": 41.3079, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.3098, + "eval_rouge1_for_task121_atomic_question_rewriting": 53.746, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 0.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 12.4779, + "eval_rouge1_for_task1344_rte_textual_entailment": 69.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.8628, + "eval_rouge1_for_task1356_xlsum_title_generation": 27.7589, + "eval_rouge1_for_task1358_xlsum_title_generation": 36.2959, + "eval_rouge1_for_task1385_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 36.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 43.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 38.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 76.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 70.5857, + "eval_rouge1_for_task1407_dart_data_to_text": 26.8333, + "eval_rouge1_for_task1409_dart_data_to_text": 44.8879, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 53.3227, + "eval_rouge1_for_task1439_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 59.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 8.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 66.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 53.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 36.1871, + "eval_rouge1_for_task1554_scitail_textual_entailment": 77.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.1622, + "eval_rouge1_for_task1562_zest_question_rewriting": 60.8507, + "eval_rouge1_for_task1586_scifact_title_generation": 39.2459, + "eval_rouge1_for_task1598_nyc_data_to_text": 5.494, + "eval_rouge1_for_task1612_sick_textual_entailment": 40.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 87.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.097, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 58.7942, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1659_billsum_title_generation": 52.4069, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 35.7714, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 48.1685, + "eval_rouge1_for_task190_snli_textual_entailment": 4.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 36.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 92.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 18.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 43.0, + "eval_rouge1_for_task219_rocstories_title_generation": 22.7308, + "eval_rouge1_for_task220_rocstories_title_generation": 97.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 0.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 0.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 83.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 64.55, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 14.0288, + "eval_rouge1_for_task288_gigaword_title_generation": 32.1291, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_rouge1_for_task329_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 64.0905, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 70.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 85.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 36.8184, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 14.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 82.3123, + "eval_rouge1_for_task418_persent_title_generation": 30.5612, + "eval_rouge1_for_task442_com_qa_question_rewriting": 67.697, + "eval_rouge1_for_task500_scruples_title_generation": 17.7731, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.5748, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 43.9396, + "eval_rouge1_for_task602_wikitext_title_generation": 17.9972, + "eval_rouge1_for_task613_liar_keyword_tagging": 19.5419, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 17.817, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.4227, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 46.3846, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 59.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 35.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 38.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 77.0667, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 60.9051, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 64.9399, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 37.0859, + "eval_rouge1_for_task677_ollie_data_to_text": 34.1887, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 78.0, + "eval_rouge1_for_task743_eurlex_title_generation": 31.2088, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.4308, + "eval_rouge1_for_task769_qed_title_generation": 79.5498, + "eval_rouge1_for_task827_copa_cause_effect_classification": 81.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 54.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 58.819, + "eval_rouge1_for_task892_gap_coreference_resolution": 42.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 41.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 38.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_rouge1_for_task957_e2e_data_to_text": 51.5101, + "eval_rouge1_for_task970_sherliic_textual_entailment": 66.0, + "eval_rouge1_for_textual_entailment": 44.8194, + "eval_rouge1_for_title_generation": 39.3323, + "eval_rouge1_for_word_analogy": 23.8125, + "eval_rougeL": 43.7988, + "eval_rougeL_for_answerability_classification": 47.0769, + "eval_rougeL_for_cause_effect_classification": 61.7077, + "eval_rougeL_for_coreference_resolution": 43.6478, + "eval_rougeL_for_data_to_text": 32.5535, + "eval_rougeL_for_dialogue_act_recognition": 46.2503, + "eval_rougeL_for_grammar_error_correction": 68.5375, + "eval_rougeL_for_keyword_tagging": 53.9888, + "eval_rougeL_for_overlap_extraction": 8.7065, + "eval_rougeL_for_question_rewriting": 57.5113, + "eval_rougeL_for_task020_mctaco_answerability_classification": 55.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 54.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 31.5658, + "eval_rougeL_for_task035_winogrande_question_rewriting": 77.2456, + "eval_rougeL_for_task036_qasc_keyword_tagging": 69.9701, + "eval_rougeL_for_task039_qasc_overlap_extraction": 3.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 69.0, + "eval_rougeL_for_task102_commongen_data_to_text": 49.9793, + "eval_rougeL_for_task1152_bard_word_analogy": 11.0, + "eval_rougeL_for_task1153_bard_word_analogy": 12.0, + "eval_rougeL_for_task1154_bard_word_analogy": 10.0, + "eval_rougeL_for_task1155_bard_word_analogy": 56.0, + "eval_rougeL_for_task1156_bard_word_analogy": 25.1667, + "eval_rougeL_for_task1157_bard_word_analogy": 43.0, + "eval_rougeL_for_task1158_bard_word_analogy": 16.6667, + "eval_rougeL_for_task1159_bard_word_analogy": 16.6667, + "eval_rougeL_for_task1161_coda_19_title_generation": 35.1378, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.9047, + "eval_rougeL_for_task121_atomic_question_rewriting": 49.7742, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 0.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.2394, + "eval_rougeL_for_task1344_rte_textual_entailment": 69.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.9936, + "eval_rougeL_for_task1356_xlsum_title_generation": 23.8254, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.2782, + "eval_rougeL_for_task1385_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 36.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 43.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 38.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 76.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 70.5857, + "eval_rougeL_for_task1407_dart_data_to_text": 21.4913, + "eval_rougeL_for_task1409_dart_data_to_text": 37.6464, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 50.8291, + "eval_rougeL_for_task1439_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 59.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 8.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 66.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 53.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 32.021, + "eval_rougeL_for_task1554_scitail_textual_entailment": 77.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.246, + "eval_rougeL_for_task1562_zest_question_rewriting": 52.9, + "eval_rougeL_for_task1586_scifact_title_generation": 32.8618, + "eval_rougeL_for_task1598_nyc_data_to_text": 5.494, + "eval_rougeL_for_task1612_sick_textual_entailment": 40.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 87.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.8706, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 45.6509, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1659_billsum_title_generation": 48.82, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 35.3714, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 41.8819, + "eval_rougeL_for_task190_snli_textual_entailment": 4.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 36.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 92.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 18.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 43.0, + "eval_rougeL_for_task219_rocstories_title_generation": 21.8784, + "eval_rougeL_for_task220_rocstories_title_generation": 97.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 0.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 0.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 83.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 64.55, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 13.7464, + "eval_rougeL_for_task288_gigaword_title_generation": 27.8203, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_rougeL_for_task329_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 64.0905, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 70.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 85.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 35.6422, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 14.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 65.1795, + "eval_rougeL_for_task418_persent_title_generation": 26.2518, + "eval_rougeL_for_task442_com_qa_question_rewriting": 59.6234, + "eval_rougeL_for_task500_scruples_title_generation": 16.743, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 37.3349, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 41.8849, + "eval_rougeL_for_task602_wikitext_title_generation": 17.814, + "eval_rougeL_for_task613_liar_keyword_tagging": 19.5419, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 16.3115, + "eval_rougeL_for_task619_ohsumed_title_generation": 40.1048, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 44.3655, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 59.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 35.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 38.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 77.0667, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 60.9051, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 63.709, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 35.8585, + "eval_rougeL_for_task677_ollie_data_to_text": 27.7754, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 78.0, + "eval_rougeL_for_task743_eurlex_title_generation": 26.2338, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.1652, + "eval_rougeL_for_task769_qed_title_generation": 79.1498, + "eval_rougeL_for_task827_copa_cause_effect_classification": 81.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 54.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 58.819, + "eval_rougeL_for_task892_gap_coreference_resolution": 42.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 41.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 38.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_rougeL_for_task957_e2e_data_to_text": 37.6296, + "eval_rougeL_for_task970_sherliic_textual_entailment": 66.0, + "eval_rougeL_for_textual_entailment": 44.8194, + "eval_rougeL_for_title_generation": 36.1855, + "eval_rougeL_for_word_analogy": 23.8125, + "eval_runtime": 814.2105, + "eval_samples_per_second": 14.628, + "eval_steps_per_second": 0.915, + "step": 4000 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.9721, + "step": 4500 + }, + { + "epoch": 0.98, + "eval_exact_match": 29.9328, + "eval_exact_match_for_answerability_classification": 48.1538, + "eval_exact_match_for_cause_effect_classification": 45.4286, + "eval_exact_match_for_coreference_resolution": 42.7143, + "eval_exact_match_for_data_to_text": 0.6053, + "eval_exact_match_for_dialogue_act_recognition": 43.0, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 39.2, + "eval_exact_match_for_overlap_extraction": 3.0, + "eval_exact_match_for_question_rewriting": 2.4545, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 53.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 50.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 6.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 58.0, + "eval_exact_match_for_task102_commongen_data_to_text": 1.0, + "eval_exact_match_for_task1152_bard_word_analogy": 12.0, + "eval_exact_match_for_task1153_bard_word_analogy": 7.0, + "eval_exact_match_for_task1154_bard_word_analogy": 15.0, + "eval_exact_match_for_task1155_bard_word_analogy": 63.0, + "eval_exact_match_for_task1156_bard_word_analogy": 38.0, + "eval_exact_match_for_task1157_bard_word_analogy": 53.0, + "eval_exact_match_for_task1158_bard_word_analogy": 25.0, + "eval_exact_match_for_task1159_bard_word_analogy": 23.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 8.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 48.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 19.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 60.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 77.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 56.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 2.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 56.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 34.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 63.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 72.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 48.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 60.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 13.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 59.0, + "eval_exact_match_for_task1659_billsum_title_generation": 38.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 28.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 40.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 87.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 18.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 57.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 97.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 16.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 17.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 82.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 63.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 47.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 58.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 55.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 67.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 58.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 6.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 1.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 5.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 80.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 10.0, + "eval_exact_match_for_task602_wikitext_title_generation": 7.1429, + "eval_exact_match_for_task613_liar_keyword_tagging": 8.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 20.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 19.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 40.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 48.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 68.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 40.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 70.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 71.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 62.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 60.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 42.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 70.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 21.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 53.0, + "eval_exact_match_for_textual_entailment": 42.2083, + "eval_exact_match_for_title_generation": 12.6121, + "eval_exact_match_for_word_analogy": 29.5, + "eval_f1": 44.3464, + "eval_f1_for_answerability_classification": 50.8718, + "eval_f1_for_cause_effect_classification": 60.443, + "eval_f1_for_coreference_resolution": 48.3673, + "eval_f1_for_data_to_text": 34.4701, + "eval_f1_for_dialogue_act_recognition": 45.1429, + "eval_f1_for_grammar_error_correction": 59.7684, + "eval_f1_for_keyword_tagging": 54.3367, + "eval_f1_for_overlap_extraction": 11.584, + "eval_f1_for_question_rewriting": 54.4547, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 55.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 31.9422, + "eval_f1_for_task035_winogrande_question_rewriting": 54.6598, + "eval_f1_for_task036_qasc_keyword_tagging": 78.8, + "eval_f1_for_task039_qasc_overlap_extraction": 6.0, + "eval_f1_for_task050_multirc_answerability_classification": 58.0, + "eval_f1_for_task102_commongen_data_to_text": 49.9481, + "eval_f1_for_task1152_bard_word_analogy": 12.0, + "eval_f1_for_task1153_bard_word_analogy": 8.3333, + "eval_f1_for_task1154_bard_word_analogy": 15.0, + "eval_f1_for_task1155_bard_word_analogy": 63.0, + "eval_f1_for_task1156_bard_word_analogy": 39.3333, + "eval_f1_for_task1157_bard_word_analogy": 53.0, + "eval_f1_for_task1158_bard_word_analogy": 25.0, + "eval_f1_for_task1159_bard_word_analogy": 23.0, + "eval_f1_for_task1161_coda_19_title_generation": 38.14, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 68.9057, + "eval_f1_for_task121_atomic_question_rewriting": 52.4373, + "eval_f1_for_task133_winowhy_coreference_resolution": 48.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 11.5375, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 34.3241, + "eval_f1_for_task1356_xlsum_title_generation": 22.5907, + "eval_f1_for_task1358_xlsum_title_generation": 32.1023, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 19.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 60.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 77.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 56.0, + "eval_f1_for_task1407_dart_data_to_text": 21.7033, + "eval_f1_for_task1409_dart_data_to_text": 46.0304, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.3769, + "eval_f1_for_task1439_doqa_answerability_classification": 56.0, + "eval_f1_for_task1442_doqa_answerability_classification": 57.0, + "eval_f1_for_task1516_imppres_textual_entailment": 34.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 63.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 35.1552, + "eval_f1_for_task1554_scitail_textual_entailment": 72.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.16, + "eval_f1_for_task1562_zest_question_rewriting": 57.5955, + "eval_f1_for_task1586_scifact_title_generation": 36.1239, + "eval_f1_for_task1598_nyc_data_to_text": 3.9044, + "eval_f1_for_task1612_sick_textual_entailment": 48.0, + "eval_f1_for_task1615_sick_textual_entailment": 60.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 69.9763, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_f1_for_task1631_open_pi_data_to_text": 36.2236, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 59.0, + "eval_f1_for_task1659_billsum_title_generation": 53.0035, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 29.0167, + "eval_f1_for_task1728_web_nlg_data_to_text": 48.987, + "eval_f1_for_task190_snli_textual_entailment": 28.0, + "eval_f1_for_task199_multinli_textual_entailment": 40.0, + "eval_f1_for_task200_multinli_textual_entailment": 87.0, + "eval_f1_for_task201_multinli_textual_entailment": 18.0, + "eval_f1_for_task202_multinli_textual_entailment": 57.0, + "eval_f1_for_task219_rocstories_title_generation": 14.8919, + "eval_f1_for_task220_rocstories_title_generation": 97.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 16.0, + "eval_f1_for_task233_iirc_answerability_classification": 17.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 82.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 72.8714, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 17.1681, + "eval_f1_for_task288_gigaword_title_generation": 28.9224, + "eval_f1_for_task290_tellmewhy_answerability_classification": 82.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_f1_for_task329_gap_coreference_resolution": 55.0, + "eval_f1_for_task330_gap_coreference_resolution": 64.2635, + "eval_f1_for_task349_squad2.0_answerability_classification": 55.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 82.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 37.6238, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 6.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 77.2014, + "eval_f1_for_task418_persent_title_generation": 26.0927, + "eval_f1_for_task442_com_qa_question_rewriting": 66.3601, + "eval_f1_for_task500_scruples_title_generation": 17.4663, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 29.7903, + "eval_f1_for_task520_aquamuse_answerability_classification": 80.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 40.5648, + "eval_f1_for_task602_wikitext_title_generation": 13.6483, + "eval_f1_for_task613_liar_keyword_tagging": 16.119, + "eval_f1_for_task614_glucose_cause_effect_classification": 6.1437, + "eval_f1_for_task619_ohsumed_title_generation": 45.5207, + "eval_f1_for_task620_ohsumed_keyword_tagging": 44.969, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 19.0, + "eval_f1_for_task641_e_snli_textual_entailment": 40.0, + "eval_f1_for_task642_e_snli_textual_entailment": 48.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 81.7952, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 65.9714, + "eval_f1_for_task670_ambigqa_question_rewriting": 61.9046, + "eval_f1_for_task671_ambigqa_question_rewriting": 23.6947, + "eval_f1_for_task677_ollie_data_to_text": 29.2835, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 30.1753, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.9976, + "eval_f1_for_task769_qed_title_generation": 83.7573, + "eval_f1_for_task827_copa_cause_effect_classification": 71.0, + "eval_f1_for_task828_copa_cause_effect_classification": 62.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_f1_for_task890_gwsd_textual_entailment": 60.0, + "eval_f1_for_task891_gap_coreference_resolution": 54.6857, + "eval_f1_for_task892_gap_coreference_resolution": 42.0, + "eval_f1_for_task893_gap_coreference_resolution": 70.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 21.0, + "eval_f1_for_task957_e2e_data_to_text": 47.343, + "eval_f1_for_task970_sherliic_textual_entailment": 53.0, + "eval_f1_for_textual_entailment": 42.2083, + "eval_f1_for_title_generation": 36.676, + "eval_f1_for_word_analogy": 29.8333, + "eval_gen_len": 6.8015, + "eval_global_step": 4500, + "eval_loss": 1.4875895977020264, + "eval_rouge1": 46.6566, + "eval_rouge1_for_answerability_classification": 50.8718, + "eval_rouge1_for_cause_effect_classification": 60.5542, + "eval_rouge1_for_coreference_resolution": 48.6725, + "eval_rouge1_for_data_to_text": 36.825, + "eval_rouge1_for_dialogue_act_recognition": 48.7921, + "eval_rouge1_for_grammar_error_correction": 61.8623, + "eval_rouge1_for_keyword_tagging": 58.0416, + "eval_rouge1_for_overlap_extraction": 12.4512, + "eval_rouge1_for_question_rewriting": 56.1602, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 55.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 31.9005, + "eval_rouge1_for_task035_winogrande_question_rewriting": 55.2613, + "eval_rouge1_for_task036_qasc_keyword_tagging": 81.0333, + "eval_rouge1_for_task039_qasc_overlap_extraction": 7.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 58.0, + "eval_rouge1_for_task102_commongen_data_to_text": 58.3871, + "eval_rouge1_for_task1152_bard_word_analogy": 12.0, + "eval_rouge1_for_task1153_bard_word_analogy": 8.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 15.0, + "eval_rouge1_for_task1155_bard_word_analogy": 63.0, + "eval_rouge1_for_task1156_bard_word_analogy": 39.3333, + "eval_rouge1_for_task1157_bard_word_analogy": 53.0, + "eval_rouge1_for_task1158_bard_word_analogy": 25.0, + "eval_rouge1_for_task1159_bard_word_analogy": 23.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 42.9727, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 69.5147, + "eval_rouge1_for_task121_atomic_question_rewriting": 54.8769, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 48.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 12.5881, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 37.4916, + "eval_rouge1_for_task1356_xlsum_title_generation": 27.9458, + "eval_rouge1_for_task1358_xlsum_title_generation": 36.3793, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 24.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 60.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 77.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 64.8778, + "eval_rouge1_for_task1407_dart_data_to_text": 22.5476, + "eval_rouge1_for_task1409_dart_data_to_text": 47.2871, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.921, + "eval_rouge1_for_task1439_doqa_answerability_classification": 56.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 57.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 34.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 63.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 39.412, + "eval_rouge1_for_task1554_scitail_textual_entailment": 72.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.8035, + "eval_rouge1_for_task1562_zest_question_rewriting": 60.4425, + "eval_rouge1_for_task1586_scifact_title_generation": 40.0332, + "eval_rouge1_for_task1598_nyc_data_to_text": 3.8554, + "eval_rouge1_for_task1612_sick_textual_entailment": 48.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 85.8333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 71.0318, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 37.4926, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 59.0, + "eval_rouge1_for_task1659_billsum_title_generation": 54.7543, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 28.9889, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 51.1332, + "eval_rouge1_for_task190_snli_textual_entailment": 28.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 40.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 87.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 18.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 57.0, + "eval_rouge1_for_task219_rocstories_title_generation": 18.1984, + "eval_rouge1_for_task220_rocstories_title_generation": 97.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 16.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 17.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 82.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 73.2167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 17.9024, + "eval_rouge1_for_task288_gigaword_title_generation": 32.766, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 82.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_rouge1_for_task329_gap_coreference_resolution": 55.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 64.219, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 55.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 82.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 38.6046, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 8.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 79.5342, + "eval_rouge1_for_task418_persent_title_generation": 30.1951, + "eval_rouge1_for_task442_com_qa_question_rewriting": 70.2596, + "eval_rouge1_for_task500_scruples_title_generation": 20.633, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 29.6301, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 80.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 41.5575, + "eval_rouge1_for_task602_wikitext_title_generation": 15.1711, + "eval_rouge1_for_task613_liar_keyword_tagging": 22.6129, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 5.9416, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.6796, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 53.1333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 40.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 48.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 83.4286, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 67.7381, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 63.1917, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 24.2568, + "eval_rouge1_for_task677_ollie_data_to_text": 33.7066, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 84.0, + "eval_rouge1_for_task743_eurlex_title_generation": 32.1103, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.0855, + "eval_rouge1_for_task769_qed_title_generation": 83.7574, + "eval_rouge1_for_task827_copa_cause_effect_classification": 71.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 62.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 39.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 60.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 54.919, + "eval_rouge1_for_task892_gap_coreference_resolution": 42.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 70.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 21.0, + "eval_rouge1_for_task957_e2e_data_to_text": 48.443, + "eval_rouge1_for_task970_sherliic_textual_entailment": 53.0, + "eval_rouge1_for_textual_entailment": 47.8264, + "eval_rouge1_for_title_generation": 39.3137, + "eval_rouge1_for_word_analogy": 29.8333, + "eval_rougeL": 45.2967, + "eval_rougeL_for_answerability_classification": 50.8718, + "eval_rougeL_for_cause_effect_classification": 60.4585, + "eval_rougeL_for_coreference_resolution": 48.6725, + "eval_rougeL_for_data_to_text": 31.4556, + "eval_rougeL_for_dialogue_act_recognition": 48.7921, + "eval_rougeL_for_grammar_error_correction": 59.6225, + "eval_rougeL_for_keyword_tagging": 57.8083, + "eval_rougeL_for_overlap_extraction": 12.3534, + "eval_rougeL_for_question_rewriting": 51.1895, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 55.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 30.7714, + "eval_rougeL_for_task035_winogrande_question_rewriting": 48.078, + "eval_rougeL_for_task036_qasc_keyword_tagging": 81.0333, + "eval_rougeL_for_task039_qasc_overlap_extraction": 7.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 58.0, + "eval_rougeL_for_task102_commongen_data_to_text": 51.492, + "eval_rougeL_for_task1152_bard_word_analogy": 12.0, + "eval_rougeL_for_task1153_bard_word_analogy": 8.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 15.0, + "eval_rougeL_for_task1155_bard_word_analogy": 63.0, + "eval_rougeL_for_task1156_bard_word_analogy": 39.3333, + "eval_rougeL_for_task1157_bard_word_analogy": 53.0, + "eval_rougeL_for_task1158_bard_word_analogy": 25.0, + "eval_rougeL_for_task1159_bard_word_analogy": 23.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 35.719, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 67.2452, + "eval_rougeL_for_task121_atomic_question_rewriting": 51.0765, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 48.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.1069, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 34.2185, + "eval_rougeL_for_task1356_xlsum_title_generation": 24.629, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.2119, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 24.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 60.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 77.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 64.8778, + "eval_rougeL_for_task1407_dart_data_to_text": 18.2126, + "eval_rougeL_for_task1409_dart_data_to_text": 40.695, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 33.3472, + "eval_rougeL_for_task1439_doqa_answerability_classification": 56.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 57.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 34.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 63.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 57.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 34.434, + "eval_rougeL_for_task1554_scitail_textual_entailment": 72.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.8978, + "eval_rougeL_for_task1562_zest_question_rewriting": 52.6542, + "eval_rougeL_for_task1586_scifact_title_generation": 34.2707, + "eval_rougeL_for_task1598_nyc_data_to_text": 3.8554, + "eval_rougeL_for_task1612_sick_textual_entailment": 48.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 85.8333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 67.149, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 34.692, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 59.0, + "eval_rougeL_for_task1659_billsum_title_generation": 51.1187, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 28.9889, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 43.6442, + "eval_rougeL_for_task190_snli_textual_entailment": 28.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 40.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 87.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 18.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 57.0, + "eval_rougeL_for_task219_rocstories_title_generation": 17.9126, + "eval_rougeL_for_task220_rocstories_title_generation": 97.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 16.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 17.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 82.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 73.2167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 17.7068, + "eval_rougeL_for_task288_gigaword_title_generation": 27.923, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 82.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_rougeL_for_task329_gap_coreference_resolution": 55.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 64.219, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 55.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 82.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 38.2259, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 8.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 62.6651, + "eval_rougeL_for_task418_persent_title_generation": 26.3759, + "eval_rougeL_for_task442_com_qa_question_rewriting": 64.8256, + "eval_rougeL_for_task500_scruples_title_generation": 19.2729, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 29.6301, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 80.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.1964, + "eval_rougeL_for_task602_wikitext_title_generation": 15.1711, + "eval_rougeL_for_task613_liar_keyword_tagging": 22.6129, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 5.6504, + "eval_rougeL_for_task619_ohsumed_title_generation": 41.373, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 51.9667, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 40.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 48.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 83.4286, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 67.7381, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 61.2988, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 23.1019, + "eval_rougeL_for_task677_ollie_data_to_text": 27.6173, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 84.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.6993, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.4983, + "eval_rougeL_for_task769_qed_title_generation": 83.313, + "eval_rougeL_for_task827_copa_cause_effect_classification": 71.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 62.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 39.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 60.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 54.919, + "eval_rougeL_for_task892_gap_coreference_resolution": 42.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 70.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 21.0, + "eval_rougeL_for_task957_e2e_data_to_text": 38.4453, + "eval_rougeL_for_task970_sherliic_textual_entailment": 53.0, + "eval_rougeL_for_textual_entailment": 47.8264, + "eval_rougeL_for_title_generation": 36.1508, + "eval_rougeL_for_word_analogy": 29.8333, + "eval_runtime": 688.328, + "eval_samples_per_second": 17.303, + "eval_steps_per_second": 1.082, + "step": 4500 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.8083, + "step": 5000 + }, + { + "epoch": 1.09, + "eval_exact_match": 28.3627, + "eval_exact_match_for_answerability_classification": 43.0, + "eval_exact_match_for_cause_effect_classification": 46.4286, + "eval_exact_match_for_coreference_resolution": 41.5714, + "eval_exact_match_for_data_to_text": 0.7264, + "eval_exact_match_for_dialogue_act_recognition": 39.0, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 40.2, + "eval_exact_match_for_overlap_extraction": 3.5, + "eval_exact_match_for_question_rewriting": 2.4545, + "eval_exact_match_for_task020_mctaco_answerability_classification": 47.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 57.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 46.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 7.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 69.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 11.0, + "eval_exact_match_for_task1153_bard_word_analogy": 8.0, + "eval_exact_match_for_task1154_bard_word_analogy": 18.0, + "eval_exact_match_for_task1155_bard_word_analogy": 57.0, + "eval_exact_match_for_task1156_bard_word_analogy": 31.0, + "eval_exact_match_for_task1157_bard_word_analogy": 41.0, + "eval_exact_match_for_task1158_bard_word_analogy": 23.0, + "eval_exact_match_for_task1159_bard_word_analogy": 12.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 36.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 68.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 26.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 27.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 19.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 77.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 3.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 59.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 58.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 34.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 57.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 58.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 26.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 61.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 60.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 64.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 14.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1659_billsum_title_generation": 34.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 3.0, + "eval_exact_match_for_task190_snli_textual_entailment": 10.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 39.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 89.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 12.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 57.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 12.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 17.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 92.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 61.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 0.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 59.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 51.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 61.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 56.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 2.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 7.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 52.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 5.9524, + "eval_exact_match_for_task613_liar_keyword_tagging": 9.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 21.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 66.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 2.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 44.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 59.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 34.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 58.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 76.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 64.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 24.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 10.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 64.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 80.0, + "eval_exact_match_for_textual_entailment": 40.625, + "eval_exact_match_for_title_generation": 11.6031, + "eval_exact_match_for_word_analogy": 25.125, + "eval_f1": 43.7679, + "eval_f1_for_answerability_classification": 47.0359, + "eval_f1_for_cause_effect_classification": 61.8107, + "eval_f1_for_coreference_resolution": 47.4559, + "eval_f1_for_data_to_text": 38.7582, + "eval_f1_for_dialogue_act_recognition": 41.3095, + "eval_f1_for_grammar_error_correction": 63.7626, + "eval_f1_for_keyword_tagging": 53.6114, + "eval_f1_for_overlap_extraction": 9.997, + "eval_f1_for_question_rewriting": 58.2767, + "eval_f1_for_task020_mctaco_answerability_classification": 47.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 57.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 23.3943, + "eval_f1_for_task035_winogrande_question_rewriting": 71.5835, + "eval_f1_for_task036_qasc_keyword_tagging": 73.5167, + "eval_f1_for_task039_qasc_overlap_extraction": 7.0, + "eval_f1_for_task050_multirc_answerability_classification": 69.0, + "eval_f1_for_task102_commongen_data_to_text": 50.5806, + "eval_f1_for_task1152_bard_word_analogy": 12.3333, + "eval_f1_for_task1153_bard_word_analogy": 8.0, + "eval_f1_for_task1154_bard_word_analogy": 18.0, + "eval_f1_for_task1155_bard_word_analogy": 57.0, + "eval_f1_for_task1156_bard_word_analogy": 31.0, + "eval_f1_for_task1157_bard_word_analogy": 41.0, + "eval_f1_for_task1158_bard_word_analogy": 23.0, + "eval_f1_for_task1159_bard_word_analogy": 12.0, + "eval_f1_for_task1161_coda_19_title_generation": 36.715, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.3652, + "eval_f1_for_task121_atomic_question_rewriting": 52.1612, + "eval_f1_for_task133_winowhy_coreference_resolution": 36.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.2616, + "eval_f1_for_task1344_rte_textual_entailment": 68.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.1522, + "eval_f1_for_task1356_xlsum_title_generation": 21.3146, + "eval_f1_for_task1358_xlsum_title_generation": 33.1619, + "eval_f1_for_task1385_anli_textual_entailment": 30.0, + "eval_f1_for_task1386_anli_textual_entailment": 26.0, + "eval_f1_for_task1387_anli_textual_entailment": 27.0, + "eval_f1_for_task1388_cb_textual_entailment": 19.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 77.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 60.0, + "eval_f1_for_task1407_dart_data_to_text": 24.155, + "eval_f1_for_task1409_dart_data_to_text": 53.9464, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 42.267, + "eval_f1_for_task1439_doqa_answerability_classification": 59.0, + "eval_f1_for_task1442_doqa_answerability_classification": 58.0, + "eval_f1_for_task1516_imppres_textual_entailment": 34.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 57.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 58.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 26.0, + "eval_f1_for_task1540_peer_read_title_generation": 35.4399, + "eval_f1_for_task1554_scitail_textual_entailment": 61.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 85.2583, + "eval_f1_for_task1562_zest_question_rewriting": 57.9641, + "eval_f1_for_task1586_scifact_title_generation": 36.251, + "eval_f1_for_task1598_nyc_data_to_text": 6.9066, + "eval_f1_for_task1612_sick_textual_entailment": 60.0, + "eval_f1_for_task1615_sick_textual_entailment": 64.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.4233, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_f1_for_task1631_open_pi_data_to_text": 49.5031, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_f1_for_task1659_billsum_title_generation": 50.2878, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 31.0333, + "eval_f1_for_task1728_web_nlg_data_to_text": 53.2406, + "eval_f1_for_task190_snli_textual_entailment": 10.0, + "eval_f1_for_task199_multinli_textual_entailment": 39.0, + "eval_f1_for_task200_multinli_textual_entailment": 89.0, + "eval_f1_for_task201_multinli_textual_entailment": 12.0, + "eval_f1_for_task202_multinli_textual_entailment": 57.0, + "eval_f1_for_task219_rocstories_title_generation": 16.3244, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_f1_for_task232_iirc_answerability_classification": 12.0, + "eval_f1_for_task233_iirc_answerability_classification": 17.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 92.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 69.0333, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 12.994, + "eval_f1_for_task288_gigaword_title_generation": 29.6954, + "eval_f1_for_task290_tellmewhy_answerability_classification": 52.4667, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_f1_for_task329_gap_coreference_resolution": 52.0, + "eval_f1_for_task330_gap_coreference_resolution": 67.6302, + "eval_f1_for_task349_squad2.0_answerability_classification": 51.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 40.7245, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 7.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 76.1802, + "eval_f1_for_task418_persent_title_generation": 28.3674, + "eval_f1_for_task442_com_qa_question_rewriting": 67.3892, + "eval_f1_for_task500_scruples_title_generation": 17.4946, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.644, + "eval_f1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 37.0156, + "eval_f1_for_task602_wikitext_title_generation": 14.2249, + "eval_f1_for_task613_liar_keyword_tagging": 16.5, + "eval_f1_for_task614_glucose_cause_effect_classification": 6.2836, + "eval_f1_for_task619_ohsumed_title_generation": 42.9992, + "eval_f1_for_task620_ohsumed_keyword_tagging": 36.0357, + "eval_f1_for_task623_ohsumed_keyword_tagging": 66.0, + "eval_f1_for_task640_e_snli_textual_entailment": 2.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 44.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 76.0048, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 61.1667, + "eval_f1_for_task670_ambigqa_question_rewriting": 65.7449, + "eval_f1_for_task671_ambigqa_question_rewriting": 30.6861, + "eval_f1_for_task677_ollie_data_to_text": 32.2261, + "eval_f1_for_task738_perspectrum_textual_entailment": 0.0, + "eval_f1_for_task743_eurlex_title_generation": 32.984, + "eval_f1_for_task760_msr_sqa_data_to_text": 7.4898, + "eval_f1_for_task769_qed_title_generation": 81.0645, + "eval_f1_for_task827_copa_cause_effect_classification": 76.0, + "eval_f1_for_task828_copa_cause_effect_classification": 64.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.6667, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 24.0, + "eval_f1_for_task890_gwsd_textual_entailment": 10.0, + "eval_f1_for_task891_gap_coreference_resolution": 58.1857, + "eval_f1_for_task892_gap_coreference_resolution": 44.0, + "eval_f1_for_task893_gap_coreference_resolution": 64.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 47.637, + "eval_f1_for_task970_sherliic_textual_entailment": 80.0, + "eval_f1_for_textual_entailment": 40.625, + "eval_f1_for_title_generation": 37.0499, + "eval_f1_for_word_analogy": 25.2917, + "eval_gen_len": 7.9606, + "eval_global_step": 5000, + "eval_loss": 1.5354130268096924, + "eval_rouge1": 46.376, + "eval_rouge1_for_answerability_classification": 47.525, + "eval_rouge1_for_cause_effect_classification": 62.537, + "eval_rouge1_for_coreference_resolution": 47.7378, + "eval_rouge1_for_data_to_text": 41.4504, + "eval_rouge1_for_dialogue_act_recognition": 44.4524, + "eval_rouge1_for_grammar_error_correction": 65.5387, + "eval_rouge1_for_keyword_tagging": 58.0719, + "eval_rouge1_for_overlap_extraction": 10.702, + "eval_rouge1_for_question_rewriting": 59.8363, + "eval_rouge1_for_task020_mctaco_answerability_classification": 47.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 57.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 23.3494, + "eval_rouge1_for_task035_winogrande_question_rewriting": 72.8344, + "eval_rouge1_for_task036_qasc_keyword_tagging": 77.0167, + "eval_rouge1_for_task039_qasc_overlap_extraction": 8.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 69.0, + "eval_rouge1_for_task102_commongen_data_to_text": 58.4067, + "eval_rouge1_for_task1152_bard_word_analogy": 12.3333, + "eval_rouge1_for_task1153_bard_word_analogy": 8.0, + "eval_rouge1_for_task1154_bard_word_analogy": 18.0, + "eval_rouge1_for_task1155_bard_word_analogy": 57.0, + "eval_rouge1_for_task1156_bard_word_analogy": 31.0, + "eval_rouge1_for_task1157_bard_word_analogy": 41.0, + "eval_rouge1_for_task1158_bard_word_analogy": 23.0, + "eval_rouge1_for_task1159_bard_word_analogy": 12.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 40.9143, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.8325, + "eval_rouge1_for_task121_atomic_question_rewriting": 54.2914, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 36.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.0645, + "eval_rouge1_for_task1344_rte_textual_entailment": 68.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.0176, + "eval_rouge1_for_task1356_xlsum_title_generation": 24.8003, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.5135, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 35.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 77.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 62.0, + "eval_rouge1_for_task1407_dart_data_to_text": 26.0335, + "eval_rouge1_for_task1409_dart_data_to_text": 55.6195, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 43.1181, + "eval_rouge1_for_task1439_doqa_answerability_classification": 59.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 58.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 34.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 57.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 58.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 26.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 39.3472, + "eval_rouge1_for_task1554_scitail_textual_entailment": 61.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.9593, + "eval_rouge1_for_task1562_zest_question_rewriting": 60.5909, + "eval_rouge1_for_task1586_scifact_title_generation": 40.5206, + "eval_rouge1_for_task1598_nyc_data_to_text": 6.8464, + "eval_rouge1_for_task1612_sick_textual_entailment": 60.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 88.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.1292, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 49.8062, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1659_billsum_title_generation": 52.0496, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 31.0333, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 57.1268, + "eval_rouge1_for_task190_snli_textual_entailment": 10.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 39.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 89.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 12.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 57.0, + "eval_rouge1_for_task219_rocstories_title_generation": 18.6725, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 12.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 17.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 92.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 69.2, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 13.4039, + "eval_rouge1_for_task288_gigaword_title_generation": 33.092, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 58.8245, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_rouge1_for_task329_gap_coreference_resolution": 52.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 67.4762, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 51.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 41.992, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 10.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 78.342, + "eval_rouge1_for_task418_persent_title_generation": 32.1607, + "eval_rouge1_for_task442_com_qa_question_rewriting": 70.373, + "eval_rouge1_for_task500_scruples_title_generation": 20.1665, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.3903, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 37.9968, + "eval_rouge1_for_task602_wikitext_title_generation": 15.453, + "eval_rouge1_for_task613_liar_keyword_tagging": 24.6524, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 10.1005, + "eval_rouge1_for_task619_ohsumed_title_generation": 46.2393, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 45.1333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 66.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 44.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 77.5571, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 62.1, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 66.8089, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 31.6305, + "eval_rouge1_for_task677_ollie_data_to_text": 36.4247, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 79.0, + "eval_rouge1_for_task743_eurlex_title_generation": 34.596, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.9727, + "eval_rouge1_for_task769_qed_title_generation": 81.0379, + "eval_rouge1_for_task827_copa_cause_effect_classification": 76.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 64.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.6667, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 42.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 10.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 58.1857, + "eval_rouge1_for_task892_gap_coreference_resolution": 44.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 64.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 50.0439, + "eval_rouge1_for_task970_sherliic_textual_entailment": 80.0, + "eval_rouge1_for_textual_entailment": 47.5, + "eval_rouge1_for_title_generation": 39.3802, + "eval_rouge1_for_word_analogy": 25.2917, + "eval_rougeL": 44.8939, + "eval_rougeL_for_answerability_classification": 47.525, + "eval_rougeL_for_cause_effect_classification": 62.3246, + "eval_rougeL_for_coreference_resolution": 47.7378, + "eval_rougeL_for_data_to_text": 34.4774, + "eval_rougeL_for_dialogue_act_recognition": 44.4524, + "eval_rougeL_for_grammar_error_correction": 63.7532, + "eval_rougeL_for_keyword_tagging": 57.8624, + "eval_rougeL_for_overlap_extraction": 10.702, + "eval_rougeL_for_question_rewriting": 54.866, + "eval_rougeL_for_task020_mctaco_answerability_classification": 47.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 57.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 22.0812, + "eval_rougeL_for_task035_winogrande_question_rewriting": 62.1158, + "eval_rougeL_for_task036_qasc_keyword_tagging": 77.0167, + "eval_rougeL_for_task039_qasc_overlap_extraction": 8.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 69.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.3308, + "eval_rougeL_for_task1152_bard_word_analogy": 12.3333, + "eval_rougeL_for_task1153_bard_word_analogy": 8.0, + "eval_rougeL_for_task1154_bard_word_analogy": 18.0, + "eval_rougeL_for_task1155_bard_word_analogy": 57.0, + "eval_rougeL_for_task1156_bard_word_analogy": 31.0, + "eval_rougeL_for_task1157_bard_word_analogy": 41.0, + "eval_rougeL_for_task1158_bard_word_analogy": 23.0, + "eval_rougeL_for_task1159_bard_word_analogy": 12.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 34.5541, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.0462, + "eval_rougeL_for_task121_atomic_question_rewriting": 50.3138, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 36.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.8259, + "eval_rougeL_for_task1344_rte_textual_entailment": 68.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.8623, + "eval_rougeL_for_task1356_xlsum_title_generation": 20.6701, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.2528, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 35.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 77.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 62.0, + "eval_rougeL_for_task1407_dart_data_to_text": 20.7359, + "eval_rougeL_for_task1409_dart_data_to_text": 45.0957, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.4124, + "eval_rougeL_for_task1439_doqa_answerability_classification": 59.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 58.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 34.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 57.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 58.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 26.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 34.6022, + "eval_rougeL_for_task1554_scitail_textual_entailment": 61.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 87.0939, + "eval_rougeL_for_task1562_zest_question_rewriting": 52.9568, + "eval_rougeL_for_task1586_scifact_title_generation": 33.8888, + "eval_rougeL_for_task1598_nyc_data_to_text": 6.8464, + "eval_rougeL_for_task1612_sick_textual_entailment": 60.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 88.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 75.5047, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 40.144, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1659_billsum_title_generation": 48.4296, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 31.0333, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 49.199, + "eval_rougeL_for_task190_snli_textual_entailment": 10.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 39.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 89.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 12.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 57.0, + "eval_rougeL_for_task219_rocstories_title_generation": 18.3868, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 12.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 17.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 92.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 69.2, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 13.4039, + "eval_rougeL_for_task288_gigaword_title_generation": 28.8203, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 58.8245, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_rougeL_for_task329_gap_coreference_resolution": 52.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 67.4762, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 51.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 76.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 85.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 41.2721, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 10.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 63.0857, + "eval_rougeL_for_task418_persent_title_generation": 28.6335, + "eval_rougeL_for_task442_com_qa_question_rewriting": 65.2977, + "eval_rougeL_for_task500_scruples_title_generation": 18.3206, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 37.3903, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 36.019, + "eval_rougeL_for_task602_wikitext_title_generation": 15.2149, + "eval_rougeL_for_task613_liar_keyword_tagging": 24.6524, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 9.3332, + "eval_rougeL_for_task619_ohsumed_title_generation": 38.6929, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 44.0857, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 66.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 44.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 77.5571, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 62.1, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 65.2519, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 30.0095, + "eval_rougeL_for_task677_ollie_data_to_text": 29.6927, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 79.0, + "eval_rougeL_for_task743_eurlex_title_generation": 28.55, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.7967, + "eval_rougeL_for_task769_qed_title_generation": 80.5935, + "eval_rougeL_for_task827_copa_cause_effect_classification": 76.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 64.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 3.6667, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 42.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 10.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 58.1857, + "eval_rougeL_for_task892_gap_coreference_resolution": 44.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 64.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 52.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 36.9717, + "eval_rougeL_for_task970_sherliic_textual_entailment": 80.0, + "eval_rougeL_for_textual_entailment": 47.5, + "eval_rougeL_for_title_generation": 36.1217, + "eval_rougeL_for_word_analogy": 25.2917, + "eval_runtime": 784.1619, + "eval_samples_per_second": 15.188, + "eval_steps_per_second": 0.95, + "step": 5000 + }, + { + "epoch": 1.09, + "step": 5000, + "total_flos": 2.783818148573348e+17, + "train_loss": 1.0683723251342774, + "train_runtime": 34517.1129, + "train_samples_per_second": 2.318, + "train_steps_per_second": 0.145 + } + ], + "max_steps": 5000, + "num_train_epochs": 2, + "total_flos": 2.783818148573348e+17, + "trial_name": null, + "trial_params": null +}