Muennighoff
commited on
Commit
•
62be092
1
Parent(s):
29393c3
Add
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +106 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_1.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_2.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_3.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_4.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_5.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_1.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_2.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_3.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_4.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_5.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_0.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_1.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_2.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_3.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_gem_xsum_article_DOC_summary_1.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_gem_xsum_article_DOC_summary_2.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_gem_xsum_article_DOC_summary_3.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_gem_xsum_article_DOC_summary_4.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_gem_xsum_article_DOC_summary_5.json +1 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_1.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_2.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_3.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_4.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_5.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_1.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_2.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_3.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_4.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_1.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_2.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_3.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_4.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_5.jsonl +3 -0
- 4b284b17bc4seed3/evaluation/generation/merged.csv +46 -3
- 4b284b17bc4seed3/evaluation/generation/merged.json +1 -1
- 4b284b17bc4seed3/evaluation/generation/slim.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_1.json +133 -0
- 4b284b17bc4seed3/evaluation/generation/slim.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_2.json +133 -0
- 4b284b17bc4seed3/evaluation/generation/slim.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_3.json +133 -0
- 4b284b17bc4seed3/evaluation/generation/slim.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_4.json +133 -0
- 4b284b17bc4seed3/evaluation/generation/slim.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_5.json +133 -0
.gitattributes
CHANGED
@@ -598,3 +598,109 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
598 |
4b284b12bc4seed3/evaluation/generation/examples.4b284b12bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
599 |
4b284b12bc4seed3/evaluation/generation/examples.4b284b12bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
600 |
4b284b17bc4seed2/evaluation/generation/examples.4b284b17bc4seed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
598 |
4b284b12bc4seed3/evaluation/generation/examples.4b284b12bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
599 |
4b284b12bc4seed3/evaluation/generation/examples.4b284b12bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
600 |
4b284b17bc4seed2/evaluation/generation/examples.4b284b17bc4seed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
601 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
602 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
603 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
604 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
605 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
606 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
607 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
608 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
609 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
610 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
611 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
612 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
613 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
614 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
615 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
616 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
617 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
618 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
619 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
620 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
621 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
622 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
623 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
624 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
625 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
626 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
627 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
628 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
629 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
630 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
631 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
632 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
633 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
634 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
635 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
636 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
637 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
638 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
639 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
640 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
641 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
642 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
643 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
644 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
645 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
646 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
647 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
648 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
649 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
650 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
651 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
652 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
653 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
654 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
655 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
656 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
657 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
658 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
659 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
660 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
661 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
662 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
663 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
664 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
665 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
666 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
667 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
668 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
669 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
670 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
671 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
672 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
673 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
674 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
675 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
676 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
677 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
678 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
679 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
680 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
681 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
682 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
683 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
684 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
685 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
686 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
687 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
688 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
689 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
690 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
691 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
692 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
693 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
694 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
695 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
696 |
+
4b284b28bc4seed4/evaluation/generation/examples.4b284b28bc4seed4_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
697 |
+
4b284b42bc4seed1/evaluation/generation/examples.4b284b42bc4seed1_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
698 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
699 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
700 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
701 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
702 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
703 |
+
4b284b42bc4seed4/evaluation/generation/examples.4b284b42bc4seed4_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
704 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
705 |
+
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
706 |
+
4b284b42bc4seed2/evaluation/generation/examples.4b284b42bc4seed2_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.419099869257768, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.031194484818270595}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07428309105028663, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013421912364638756}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3681872402424481, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005159853930374422}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11607434147069716, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001850012647052215}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03414063617858362, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007933999992729109}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.17816632883864172, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0035533786923895483}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.053578321986389954, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001133087024919444}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07011599452669921, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012215073828548383}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.34670303763093524, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004708835524319321}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10966698573493393, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001691639710103682}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.0710048733913006, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012664792510428985}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3511669157710232, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004777230946733221}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.11093711988762583, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017419311094458187}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_2.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.49148117932803576, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03324507460068721}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07902449045214965, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014400750722415926}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.38710322053697205, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005007184653985026}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.12352694244894348, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001906067502643356}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.036715281213131654, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009607779535126237}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.19100291871237257, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0035992814997918465}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05758229272088689, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012545799402892376}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07447122807071402, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013245130142878934}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.36501755504076894, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004584459311617146}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.11654156016524217, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017494100681024593}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07548416619016371, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013763939300205914}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.36910030227270246, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0046609020187487835}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.1179785006841591, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001814093430670857}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_3.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5454434527628271, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.022000087258590196}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07951071501680998, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013359456205039861}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3995230958506392, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005034816997358078}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.12498856968844978, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018098054094220577}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03707635426895073, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008167570156064652}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1992034939045538, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0036422113394633904}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05860558987165317, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011419392844985948}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07471295258744902, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012223182423721426}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3744384042521063, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004568576393272832}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.11746888068908964, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016549198819411505}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07594762490697103, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012672858502526187}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3811393237968741, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0047038769161714305}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.11935873168692221, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017154967515797509}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.556791168026395, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.032317222532058425}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.080998817707546, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013339367163834135}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.41001072494996027, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005030415658363669}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.12736411777875145, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017854249994775197}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03778277803591952, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007985595270354018}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.20751135104918955, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003724857487616047}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05987151579611729, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011284434076196772}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07546065667342094, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011740175946367535}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3832768865547933, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004588049116216556}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.11884367913833037, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015910790592394222}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.0770058183518017, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001258314779905608}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3899354662021343, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0046962894721850395}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.12102961800817194, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016712189432626576}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.6622000128005519, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04388394326637253}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.0828504135671057, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013617141050727788}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.4253413823950648, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005183982393059217}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.13062641947274276, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018599113152054116}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03910444837960125, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008204438440226741}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.21879925159555091, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003830311541729702}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.06215772688428125, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011664907997072857}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07664370158738154, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012087093820958138}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3931352132690314, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004667042410814549}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.12085376540315042, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016483232519714076}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.0784559405601766, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001278094865391385}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.4021636336590835, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004801298542115264}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.12367283564282297, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001741836937013877}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.18786454670077013, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0021735146937252514}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.3096972695063147, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026908794087143395}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.21505528399036353, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018995818395012695}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.046017239861773564, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010383201879430278}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.07638282078411858, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001616116555083565}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0520016479381188, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010115416490534225}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.13408781232450467, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015202885672530046}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.228446964266991, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021094393606001564}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.15458892744897648, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012683660303014826}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.17549862722884657, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0020277616701441}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.2903907813858148, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002551194252900688}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.20107870479610124, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017683098199495104}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.5399310295830744, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06335448916317216}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_2.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.19925348839011015, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002530887504474253}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.30168216059058767, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027584806748195286}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.21431878758710393, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001892231712078584}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.05211637513787623, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0014101145058240962}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.07661749012514224, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016621817373886419}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.05343405736694957, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010466156834578935}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.1459245266787269, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001968097177719896}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.22436913136774292, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021845377458775844}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.15648689865161197, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001335577123427596}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.18579433666925044, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0023734027386265824}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.28243649699768403, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026008524848487456}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.19998202287730088, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017573384668861053}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.7004264776619973, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04754974260466431}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_3.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1749459104752398, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0028081656356849396}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.25083768002533663, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0032561216516905035}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.18047060140146295, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022363114882498754}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.04472735340979592, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001350826653292456}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.06454184644160546, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016287344839170377}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0450401939765468, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010303104556608174}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.13041950681379508, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0022318085027860915}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1890702982418797, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0025828526179600634}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.13350234163346472, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016223374974201662}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.16353530897797455, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002648699146049675}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.23494832208674143, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003071224693002067}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.16855536587916595, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020816012906548086}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.768175532900837, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08791006908529778}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.05841942875795064, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002258669318096189}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.08310445450589375, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0028861997102921245}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.05748810248970106, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019355199912109056}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.015562598115830875, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010426862288334016}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.021902149806596185, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0011840444107333239}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.01440686850465203, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0006872138582561727}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.04491102015047465, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018157746751351956}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.0639489574231478, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0022694007205090625}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.043426885817304715, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014445186217684237}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.054595377345807605, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002130882878195972}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.07756220488125122, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026983682067508114}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.053572124748162045, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018012586971119814}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.5400251321986815, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04043739724986152}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.009596776100526004, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001019209195219191}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.01392559118633662, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0013597144698971257}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.00925886684520902, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0008626552575302221}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.00278778328805662, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0004979552032669205}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.003746712452544001, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0005041844367851664}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.002342164016882043, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00028524985779443895}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.007610444403310259, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0008454347622960979}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.010924872503483939, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0010786580983304865}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.007183528387342368, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0006702843412463305}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.008955989656763915, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0009697015558074734}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.012833001053222909, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0012481388962640927}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.008560782240302826, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0007971922119836481}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 4.830496766216795e-07, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.3632675428818108e-06}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_0.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 0.6485963049016981, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04147540657533846}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.08118908870859427, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0010502348257405204}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.16114628661538824, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0016308230658841419}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.10530094743277582, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0011475008410041217}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.011512639257160307, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0003958636264012863}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.021242412676126293, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0006341502248111687}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.014504611707718457, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0004328253644535482}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.07887021804923314, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010138195390399778}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.15721608674276338, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0016254041219117546}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.10251613030676501, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001133123737831979}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.06955075821996862, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0009278452749297827}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.13846806947227538, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0014005015563785719}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.09019264348297594, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0009843878120389428}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 11.202939466802542, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15557163142979236}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5276213771845283, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0031644402162150204}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4199128051324714, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029363725065963847}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.44242905190492954, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002297039766149727}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.24348133396318822, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0024486552475781556}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.19171502531070608, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002076856678425889}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.20190483168073992, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0019079943047259457}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.38160396282429127, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027868687264562394}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.30167752620089633, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002395127408402739}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.31831466008523984, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020089558605603224}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.42898878033348153, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0030397544234292777}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3404134846400345, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026908887018321018}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3589538427898855, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022583552169404796}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_2.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 13.224489183075699, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1542919629511485}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5552791226061188, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003254774499994265}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.44192153304211734, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002841866842369357}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.46712866601497155, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002228929211920135}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.27559407981411804, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0026840620337579744}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2160232963974258, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0021622070141122758}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.22826333671399182, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001982493892067202}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.41343793780549115, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0029673993676579053}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3270153550649051, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002419997997789347}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.34607223155387784, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020499510521957863}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4622035845427888, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031692057784133473}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.36668222287547403, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026606231763240088}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3879557827136412, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002246731755612453}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_3.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 13.908159500411205, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.20174014620170244}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5586145393847135, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032093462628881024}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4479253835621662, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027861082552568537}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.47315087182166937, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021809395838394297}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2811326577572755, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002660259888528566}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.22237355181607343, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0021928806781846234}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.23487744324001114, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020072755600355594}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.41974248298297395, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0029716355503285126}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.33479036646237736, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024152288975060475}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.354131271065786, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020718253695416254}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4690248076511698, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031541833691154436}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3759227050158839, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00267463975539547}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3970905326362423, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022642244285014093}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 14.074485969366448, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15597466693230966}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5599161256832338, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032425821310525698}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4451384314886588, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002764740052967528}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.47347311435925227, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022379801388622295}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.28322196876096284, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002731652037279103}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.22202287091247278, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002200228535591399}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.23627847944239633, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002068806847352114}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4228642176471588, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002955640491032234}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3350162539475165, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0023931644525465464}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3565888842432221, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020875325881162836}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.47254222967645987, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003172921364352923}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3759274566672933, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026693043199832616}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3997926861751811, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023223701465468023}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 14.100673417779602, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1700464491733536}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5629367404882956, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003237018756314601}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4475117440378833, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027438170780577564}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4763097997248468, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002217032057449442}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.28412809399742106, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002691518203387113}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.22326882970173134, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0022049714378463655}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.23752008995458807, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020632823792997718}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4265229770672075, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0029468709368597047}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3384047369164962, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002443108834356436}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.360217577146398, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002116681014730912}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.47658389994415734, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003163890773620455}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.37891160781444594, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002667873296658845}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.40315853166110005, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022934508658619883}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_gem_xsum_article_DOC_summary_1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.13046457384320914, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001899307370424846}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.32113789701215495, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004337639576472594}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.18340013205662573, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0025516121371784496}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.02886080390581479, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001078544714447002}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.07345674571988704, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002741461964189982}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.04096305934583588, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015175654731468782}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10169303665196328, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001434974699819564}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.252942835024364, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0034868230588887836}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1433098805171502, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019504428531146873}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.10403633135467649, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015254340828458335}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.25849490865028, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036741939807784555}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.1466246543732191, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020796489449315457}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.6877533563889122, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09478328567541407}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_gem_xsum_article_DOC_summary_2.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.13198766549097124, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018585101946349278}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.3253291743058924, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004286918895481337}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.18556704305676935, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0024908334509911776}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.02998125544073759, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010790606390901271}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.07619626643473272, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00283206081800297}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.04250852041968189, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015273224602859812}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10428336846259101, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001428543354863552}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.2586407657468338, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0034290843517090206}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.14684147138930212, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019308071156994384}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.10568850249903466, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015149805955265887}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.26320724362867987, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003734496619168348}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.14897921374822085, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002065878860703936}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.7177243215516453, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09757791560901401}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_gem_xsum_article_DOC_summary_3.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.12893858110935913, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002120552918947113}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.3041357783592899, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004569246127758115}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.17738625476286662, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0027024391420201748}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.026842996488638495, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010318732158795176}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.06597251723951142, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002542624442472336}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.037421370357715276, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0014052682495280058}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.09937263586580253, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001551269457995846}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.23696534016153958, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0035807139002337796}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1371693458674969, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020218247744835465}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.10253669977784535, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016793421052154036}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.244701923743446, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003884290828497489}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.14162289632540054, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002208401145989062}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.5871077824548825, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09086334216154317}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_gem_xsum_article_DOC_summary_4.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.04080971503303662, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0024653466581541048}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.07559502848935112, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00431085336204459}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.04854561922260044, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002712085047341467}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.008256325324203758, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008111140318720209}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.01667875950715412, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014867298732535266}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.010329381513392572, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009258366777536868}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.03226725296426277, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001985912844697581}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.05933716580880214, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00339176724274709}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.03801311674055581, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021161212991394045}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.03333036255441974, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002071203320532273}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.06159419431363466, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0035992824416162072}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0393740289151267, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002236920778294106}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.7195399308964131, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08904444517578003}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/agg.4b284b17bc4seed3_gem_xsum_article_DOC_summary_5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.0027962678405899387, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0008136665421638554}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0024018524920290463, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00069708293480438}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.002542484098776228, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.000736067844289626}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.00042489265574726173, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0002486632131064098}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.0003552108269089401, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0002257173323273388}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.000384360030304287, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00023575063814645501}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0021054779981265173, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0006041114084163064}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.001882860190134431, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0005579442723360266}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.001952863273498143, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0005672979405586935}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.002344649755421413, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0006798799021959105}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.002067860375134616, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0006068459708856913}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0021605449739656364, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.000627676699804427}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 2.0988877305316802e-38, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 6.623676181861596e-33}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_1.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abe5e2189c569f84a8b4848921fd5e11e7b12786290fca90e5b515365c2b7e01
|
3 |
+
size 5095388
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_2.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be61d2f963416eb52e0ec55e94b764d5d0f14d1ea9e45149d1e2b02879c75634
|
3 |
+
size 5973869
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_3.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbd1014db01636e01e0a22b180d951e490407d8c8d561063980a0e59d8559353
|
3 |
+
size 6888051
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_4.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2af0550ab7dcf1b2778d071ca5aa0ef1f4f9475834bf21f5c4d76cdc875bff14
|
3 |
+
size 7784988
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:086121e5f14be6a33a12210f34e744ed1b1319572880d3326fcf91eeb2fd9f47
|
3 |
+
size 8711263
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_1.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc0dcf1cdf48681406d621cd3c6e20abe48bc5ab6915e5bd0d803604c466ef40
|
3 |
+
size 13318518
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_2.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ed212e7fc25e4b23edcade5c02b3a0ff851489272a2340141e2e347bf9bc255
|
3 |
+
size 18896910
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_3.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c67e0bb0c43ac59a43e21481fec38a9fdb1e13a4842c12a60bc93c3368e5110e
|
3 |
+
size 24313615
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_4.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79f4349aa4fdcbeca9d2e2a1defb04cd09505c56834269fc7abe6e9b8842655a
|
3 |
+
size 29463258
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_GEM-wiki_lingua_en_tldr_en_5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76ce7cab3c411193d80b5f12426df1e2eb0e8722014c8665dedfd55efe529ea8
|
3 |
+
size 34798381
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78b2699ebffd9db1c8bb130d1e7b56244e51fd3ccaea50f7edbe786e72ed491b
|
3 |
+
size 4497686
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8384fc07b7c5396b33bcc29b2599b5d06ebc19df095c4036ab167984976c7361
|
3 |
+
size 5031465
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3231fe15bc4fd584bfe81e146edc7c317605d2dcd75df27192ccbf3da50a1d0
|
3 |
+
size 6114722
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da39402964d178b5a3ff868bceae60aff7a63a206e53c1f69d873bf33029447f
|
3 |
+
size 7196260
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8daf12d350ff9d55b64399042afaa66fed29e3d3fe98f6227e7e4265b313aae8
|
3 |
+
size 8271318
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ce68209732aae4cb58d941680eedf1fc6a253cea4d3fa863bfdea5aff97354e
|
3 |
+
size 9357721
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_1.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d58a02427cbba90d6bf2f49f5d29468a50ae30466ab53dddf35b3f8d975d48a
|
3 |
+
size 5104108
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_2.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72d6bd058847ecc552d4dec7a2fa901e1430815261d70c22d4aebdcc6b424761
|
3 |
+
size 7379348
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_3.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67223e352de6226d97715edf5065d4768f43df1fffe2ebf79cee2ab4e559b86f
|
3 |
+
size 9645214
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_4.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ff8265e5064a4754896feda5e1223ec6701dee43a21e7ad2740c09e7e2e6922
|
3 |
+
size 11671346
|
4b284b17bc4seed3/evaluation/generation/examples.4b284b17bc4seed3_gem_xsum_article_DOC_summary_5.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d76ad6418c1a4a01cbb65c008f8dda6efc211a6505eff818a8aaf709a2ef95e
|
3 |
+
size 13897521
|
4b284b17bc4seed3/evaluation/generation/merged.csv
CHANGED
@@ -1,10 +1,53 @@
|
|
1 |
dataset,fewshots,prompt,metric,value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.046452959321020074
|
3 |
gem_xsum,0,median,rouge2_fmeasure,0.046452959321020074
|
4 |
-
gem_xsum,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.05417783468044965
|
6 |
web_nlg_en,0,median,rouge2_fmeasure,0.05417783468044965
|
7 |
-
web_nlg_en,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.03539075879427683
|
9 |
wiki_lingua_en,0,median,rouge2_fmeasure,0.03539075879427683
|
10 |
-
wiki_lingua_en,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
dataset,fewshots,prompt,metric,value
|
2 |
+
e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.014504611707718457
|
3 |
+
e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.014504611707718457
|
4 |
+
e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.20190483168073992
|
5 |
+
e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.20190483168073992
|
6 |
+
e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.22826333671399182
|
7 |
+
e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.22826333671399182
|
8 |
+
e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.23487744324001114
|
9 |
+
e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.23487744324001114
|
10 |
+
e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.23627847944239633
|
11 |
+
e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.23627847944239633
|
12 |
+
e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.23752008995458807
|
13 |
+
e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.23752008995458807
|
14 |
+
e2e_nlg_cleaned,5,average,multiple,0.1922247987899076
|
15 |
gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.046452959321020074
|
16 |
gem_xsum,0,median,rouge2_fmeasure,0.046452959321020074
|
17 |
+
gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.04096305934583588
|
18 |
+
gem_xsum,1,median,rouge2_fmeasure,0.04096305934583588
|
19 |
+
gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.04250852041968189
|
20 |
+
gem_xsum,2,median,rouge2_fmeasure,0.04250852041968189
|
21 |
+
gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.037421370357715276
|
22 |
+
gem_xsum,3,median,rouge2_fmeasure,0.037421370357715276
|
23 |
+
gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.010329381513392572
|
24 |
+
gem_xsum,4,median,rouge2_fmeasure,0.010329381513392572
|
25 |
+
gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.000384360030304287
|
26 |
+
gem_xsum,5,median,rouge2_fmeasure,0.000384360030304287
|
27 |
+
gem_xsum,5,average,multiple,0.029676608497991662
|
28 |
web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.05417783468044965
|
29 |
web_nlg_en,0,median,rouge2_fmeasure,0.05417783468044965
|
30 |
+
web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.053578321986389954
|
31 |
+
web_nlg_en,1,median,rouge2_fmeasure,0.053578321986389954
|
32 |
+
web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.05758229272088689
|
33 |
+
web_nlg_en,2,median,rouge2_fmeasure,0.05758229272088689
|
34 |
+
web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.05860558987165317
|
35 |
+
web_nlg_en,3,median,rouge2_fmeasure,0.05860558987165317
|
36 |
+
web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.05987151579611729
|
37 |
+
web_nlg_en,4,median,rouge2_fmeasure,0.05987151579611729
|
38 |
+
web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.06215772688428125
|
39 |
+
web_nlg_en,5,median,rouge2_fmeasure,0.06215772688428125
|
40 |
+
web_nlg_en,5,average,multiple,0.0576622136566297
|
41 |
wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.03539075879427683
|
42 |
wiki_lingua_en,0,median,rouge2_fmeasure,0.03539075879427683
|
43 |
+
wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.0520016479381188
|
44 |
+
wiki_lingua_en,1,median,rouge2_fmeasure,0.0520016479381188
|
45 |
+
wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.05343405736694957
|
46 |
+
wiki_lingua_en,2,median,rouge2_fmeasure,0.05343405736694957
|
47 |
+
wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.0450401939765468
|
48 |
+
wiki_lingua_en,3,median,rouge2_fmeasure,0.0450401939765468
|
49 |
+
wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.01440686850465203
|
50 |
+
wiki_lingua_en,4,median,rouge2_fmeasure,0.01440686850465203
|
51 |
+
wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.002342164016882043
|
52 |
+
wiki_lingua_en,5,median,rouge2_fmeasure,0.002342164016882043
|
53 |
+
wiki_lingua_en,5,average,multiple,0.03376928176623768
|
4b284b17bc4seed3/evaluation/generation/merged.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.38814184794485884, "bleu_stderr": 0.05047480258801663, "rouge1_fmeasure": 0.11509783418657757, "rouge1_fmeasure_stderr": 0.0020703242193275064, "rouge1_precision": 0.07565066920874082, "rouge1_precision_stderr": 0.0015551905322164353, "rouge1_recall": 0.3238815837744594, "rouge1_recall_stderr": 0.004766360419359012, "rouge2_fmeasure": 0.05417783468044965, "rouge2_fmeasure_stderr": 0.0012997434401208422, "rouge2_precision": 0.035464553016307084, "rouge2_precision_stderr": 0.0009537389454644533, "rouge2_recall": 0.15704909118734803, "rouge2_recall_stderr": 0.0032984157050335527, "rougeL_fmeasure": 0.111131126212517, "rougeL_fmeasure_stderr": 0.001936148683137272, "rougeL_precision": 0.07280668033107564, "rougeL_precision_stderr": 0.0014362151271666136, "rougeL_recall": 0.315212302235133, "rougeL_recall_stderr": 0.0046596548650644975, "rougeLsum_fmeasure": 0.11011217201582478, "rougeLsum_fmeasure_stderr": 0.0019522649157393596, "rougeLsum_precision": 0.07237282830314168, "rougeLsum_precision_stderr": 0.0014666510212682613, "rougeLsum_recall": 0.3093807689813474, "rougeLsum_recall_stderr": 0.004471237151849855}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.4721568574077277, "bleu_stderr": 0.039649110894939166, "rouge1_fmeasure": 0.17714731434706937, "rouge1_fmeasure_stderr": 0.0017820831483570008, "rouge1_precision": 0.15160180317053568, "rouge1_precision_stderr": 0.001846868104763307, "rouge1_recall": 0.2579236357228536, "rouge1_recall_stderr": 0.002563846709803964, "rouge2_fmeasure": 0.03539075879427683, "rouge2_fmeasure_stderr": 0.000818749326693359, "rouge2_precision": 0.030188268597376115, "rouge2_precision_stderr": 0.0007473222285048291, "rouge2_recall": 0.05301218199575937, "rouge2_recall_stderr": 0.0013343798734673441, "rougeL_fmeasure": 0.13824591665193203, "rougeL_fmeasure_stderr": 0.0012760816451838966, "rougeL_precision": 0.11690294044579747, "rougeL_precision_stderr": 0.0012937532849923467, "rougeL_recall": 0.20645902517477255, "rougeL_recall_stderr": 0.0021105218188585406, "rougeLsum_fmeasure": 0.16318126157866722, "rougeLsum_fmeasure_stderr": 0.001624188114564557, "rougeLsum_precision": 0.13937660458718434, "rougeLsum_precision_stderr": 0.001680447465334119, "rougeLsum_recall": 0.23852271853532409, "rougeLsum_recall_stderr": 0.0023644372828476343}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.9143202589684163, "bleu_stderr": 0.05580184327046581, "rouge1_fmeasure": 0.20470721378981208, "rouge1_fmeasure_stderr": 0.0025226661937158843, "rouge1_precision": 0.1557902081027143, "rouge1_precision_stderr": 0.0022032876149756923, "rouge1_recall": 0.3350673880900352, "rouge1_recall_stderr": 0.004415721805932336, "rouge2_fmeasure": 0.046452959321020074, "rouge2_fmeasure_stderr": 0.0015784279302210721, "rouge2_precision": 0.03445353133744446, "rouge2_precision_stderr": 0.0012154256732747522, "rouge2_recall": 0.0798147637879874, "rouge2_recall_stderr": 0.002874504215431858, "rougeL_fmeasure": 0.15701792133721412, "rougeL_fmeasure_stderr": 0.001947718861021701, "rougeL_precision": 0.1191156380371497, "rougeL_precision_stderr": 0.001689615531792646, "rougeL_recall": 0.2591701921041493, "rougeL_recall_stderr": 0.0035902067326739374, "rougeLsum_fmeasure": 0.16148501668654708, "rougeLsum_fmeasure_stderr": 0.002159848508290731, "rougeLsum_precision": 0.12234454178265489, "rougeLsum_precision_stderr": 0.0018119505282670214, "rougeLsum_recall": 0.26657728841908296, "rougeLsum_recall_stderr": 0.003920062645044777}}}}
|
|
|
1 |
+
{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.38814184794485884, "bleu_stderr": 0.05047480258801663, "rouge1_fmeasure": 0.11509783418657757, "rouge1_fmeasure_stderr": 0.0020703242193275064, "rouge1_precision": 0.07565066920874082, "rouge1_precision_stderr": 0.0015551905322164353, "rouge1_recall": 0.3238815837744594, "rouge1_recall_stderr": 0.004766360419359012, "rouge2_fmeasure": 0.05417783468044965, "rouge2_fmeasure_stderr": 0.0012997434401208422, "rouge2_precision": 0.035464553016307084, "rouge2_precision_stderr": 0.0009537389454644533, "rouge2_recall": 0.15704909118734803, "rouge2_recall_stderr": 0.0032984157050335527, "rougeL_fmeasure": 0.111131126212517, "rougeL_fmeasure_stderr": 0.001936148683137272, "rougeL_precision": 0.07280668033107564, "rougeL_precision_stderr": 0.0014362151271666136, "rougeL_recall": 0.315212302235133, "rougeL_recall_stderr": 0.0046596548650644975, "rougeLsum_fmeasure": 0.11011217201582478, "rougeLsum_fmeasure_stderr": 0.0019522649157393596, "rougeLsum_precision": 0.07237282830314168, "rougeLsum_precision_stderr": 0.0014666510212682613, "rougeLsum_recall": 0.3093807689813474, "rougeLsum_recall_stderr": 0.004471237151849855}}, "1": {"PALM_prompt": {"bleu": 0.419099869257768, "bleu_stderr": 0.031194484818270595, "rouge1_fmeasure": 0.11607434147069716, "rouge1_fmeasure_stderr": 0.001850012647052215, "rouge1_precision": 0.07428309105028663, "rouge1_precision_stderr": 0.0013421912364638756, "rouge1_recall": 0.3681872402424481, "rouge1_recall_stderr": 0.005159853930374422, "rouge2_fmeasure": 0.053578321986389954, "rouge2_fmeasure_stderr": 0.001133087024919444, "rouge2_precision": 0.03414063617858362, "rouge2_precision_stderr": 0.0007933999992729109, "rouge2_recall": 0.17816632883864172, "rouge2_recall_stderr": 0.0035533786923895483, "rougeL_fmeasure": 0.10966698573493393, "rougeL_fmeasure_stderr": 0.001691639710103682, "rougeL_precision": 0.07011599452669921, "rougeL_precision_stderr": 0.0012215073828548383, "rougeL_recall": 0.34670303763093524, "rougeL_recall_stderr": 0.004708835524319321, "rougeLsum_fmeasure": 0.11093711988762583, "rougeLsum_fmeasure_stderr": 0.0017419311094458187, "rougeLsum_precision": 0.0710048733913006, "rougeLsum_precision_stderr": 0.0012664792510428985, "rougeLsum_recall": 0.3511669157710232, "rougeLsum_recall_stderr": 0.004777230946733221}}, "2": {"PALM_prompt": {"bleu": 0.49148117932803576, "bleu_stderr": 0.03324507460068721, "rouge1_fmeasure": 0.12352694244894348, "rouge1_fmeasure_stderr": 0.001906067502643356, "rouge1_precision": 0.07902449045214965, "rouge1_precision_stderr": 0.0014400750722415926, "rouge1_recall": 0.38710322053697205, "rouge1_recall_stderr": 0.005007184653985026, "rouge2_fmeasure": 0.05758229272088689, "rouge2_fmeasure_stderr": 0.0012545799402892376, "rouge2_precision": 0.036715281213131654, "rouge2_precision_stderr": 0.0009607779535126237, "rouge2_recall": 0.19100291871237257, "rouge2_recall_stderr": 0.0035992814997918465, "rougeL_fmeasure": 0.11654156016524217, "rougeL_fmeasure_stderr": 0.0017494100681024593, "rougeL_precision": 0.07447122807071402, "rougeL_precision_stderr": 0.0013245130142878934, "rougeL_recall": 0.36501755504076894, "rougeL_recall_stderr": 0.004584459311617146, "rougeLsum_fmeasure": 0.1179785006841591, "rougeLsum_fmeasure_stderr": 0.001814093430670857, "rougeLsum_precision": 0.07548416619016371, "rougeLsum_precision_stderr": 0.0013763939300205914, "rougeLsum_recall": 0.36910030227270246, "rougeLsum_recall_stderr": 0.0046609020187487835}}, "3": {"PALM_prompt": {"bleu": 0.5454434527628271, "bleu_stderr": 0.022000087258590196, "rouge1_fmeasure": 0.12498856968844978, "rouge1_fmeasure_stderr": 0.0018098054094220577, "rouge1_precision": 0.07951071501680998, "rouge1_precision_stderr": 0.0013359456205039861, "rouge1_recall": 0.3995230958506392, "rouge1_recall_stderr": 0.005034816997358078, "rouge2_fmeasure": 0.05860558987165317, "rouge2_fmeasure_stderr": 0.0011419392844985948, "rouge2_precision": 0.03707635426895073, "rouge2_precision_stderr": 0.0008167570156064652, "rouge2_recall": 0.1992034939045538, "rouge2_recall_stderr": 0.0036422113394633904, "rougeL_fmeasure": 0.11746888068908964, "rougeL_fmeasure_stderr": 0.0016549198819411505, "rougeL_precision": 0.07471295258744902, "rougeL_precision_stderr": 0.0012223182423721426, "rougeL_recall": 0.3744384042521063, "rougeL_recall_stderr": 0.004568576393272832, "rougeLsum_fmeasure": 0.11935873168692221, "rougeLsum_fmeasure_stderr": 0.0017154967515797509, "rougeLsum_precision": 0.07594762490697103, "rougeLsum_precision_stderr": 0.0012672858502526187, "rougeLsum_recall": 0.3811393237968741, "rougeLsum_recall_stderr": 0.0047038769161714305}}, "4": {"PALM_prompt": {"bleu": 0.556791168026395, "bleu_stderr": 0.032317222532058425, "rouge1_fmeasure": 0.12736411777875145, "rouge1_fmeasure_stderr": 0.0017854249994775197, "rouge1_precision": 0.080998817707546, "rouge1_precision_stderr": 0.0013339367163834135, "rouge1_recall": 0.41001072494996027, "rouge1_recall_stderr": 0.005030415658363669, "rouge2_fmeasure": 0.05987151579611729, "rouge2_fmeasure_stderr": 0.0011284434076196772, "rouge2_precision": 0.03778277803591952, "rouge2_precision_stderr": 0.0007985595270354018, "rouge2_recall": 0.20751135104918955, "rouge2_recall_stderr": 0.003724857487616047, "rougeL_fmeasure": 0.11884367913833037, "rougeL_fmeasure_stderr": 0.0015910790592394222, "rougeL_precision": 0.07546065667342094, "rougeL_precision_stderr": 0.0011740175946367535, "rougeL_recall": 0.3832768865547933, "rougeL_recall_stderr": 0.004588049116216556, "rougeLsum_fmeasure": 0.12102961800817194, "rougeLsum_fmeasure_stderr": 0.0016712189432626576, "rougeLsum_precision": 0.0770058183518017, "rougeLsum_precision_stderr": 0.001258314779905608, "rougeLsum_recall": 0.3899354662021343, "rougeLsum_recall_stderr": 0.0046962894721850395}}, "5": {"PALM_prompt": {"bleu": 0.6622000128005519, "bleu_stderr": 0.04388394326637253, "rouge1_fmeasure": 0.13062641947274276, "rouge1_fmeasure_stderr": 0.0018599113152054116, "rouge1_precision": 0.0828504135671057, "rouge1_precision_stderr": 0.0013617141050727788, "rouge1_recall": 0.4253413823950648, "rouge1_recall_stderr": 0.005183982393059217, "rouge2_fmeasure": 0.06215772688428125, "rouge2_fmeasure_stderr": 0.0011664907997072857, "rouge2_precision": 0.03910444837960125, "rouge2_precision_stderr": 0.0008204438440226741, "rouge2_recall": 0.21879925159555091, "rouge2_recall_stderr": 0.003830311541729702, "rougeL_fmeasure": 0.12085376540315042, "rougeL_fmeasure_stderr": 0.0016483232519714076, "rougeL_precision": 0.07664370158738154, "rougeL_precision_stderr": 0.0012087093820958138, "rougeL_recall": 0.3931352132690314, "rougeL_recall_stderr": 0.004667042410814549, "rougeLsum_fmeasure": 0.12367283564282297, "rougeLsum_fmeasure_stderr": 0.001741836937013877, "rougeLsum_precision": 0.0784559405601766, "rougeLsum_precision_stderr": 0.001278094865391385, "rougeLsum_recall": 0.4021636336590835, "rougeLsum_recall_stderr": 0.004801298542115264}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.4721568574077277, "bleu_stderr": 0.039649110894939166, "rouge1_fmeasure": 0.17714731434706937, "rouge1_fmeasure_stderr": 0.0017820831483570008, "rouge1_precision": 0.15160180317053568, "rouge1_precision_stderr": 0.001846868104763307, "rouge1_recall": 0.2579236357228536, "rouge1_recall_stderr": 0.002563846709803964, "rouge2_fmeasure": 0.03539075879427683, "rouge2_fmeasure_stderr": 0.000818749326693359, "rouge2_precision": 0.030188268597376115, "rouge2_precision_stderr": 0.0007473222285048291, "rouge2_recall": 0.05301218199575937, "rouge2_recall_stderr": 0.0013343798734673441, "rougeL_fmeasure": 0.13824591665193203, "rougeL_fmeasure_stderr": 0.0012760816451838966, "rougeL_precision": 0.11690294044579747, "rougeL_precision_stderr": 0.0012937532849923467, "rougeL_recall": 0.20645902517477255, "rougeL_recall_stderr": 0.0021105218188585406, "rougeLsum_fmeasure": 0.16318126157866722, "rougeLsum_fmeasure_stderr": 0.001624188114564557, "rougeLsum_precision": 0.13937660458718434, "rougeLsum_precision_stderr": 0.001680447465334119, "rougeLsum_recall": 0.23852271853532409, "rougeLsum_recall_stderr": 0.0023644372828476343}}, "1": {"tldr_en": {"bleu": 2.5399310295830744, "bleu_stderr": 0.06335448916317216, "rouge1_fmeasure": 0.21505528399036353, "rouge1_fmeasure_stderr": 0.0018995818395012695, "rouge1_precision": 0.18786454670077013, "rouge1_precision_stderr": 0.0021735146937252514, "rouge1_recall": 0.3096972695063147, "rouge1_recall_stderr": 0.0026908794087143395, "rouge2_fmeasure": 0.0520016479381188, "rouge2_fmeasure_stderr": 0.0010115416490534225, "rouge2_precision": 0.046017239861773564, "rouge2_precision_stderr": 0.0010383201879430278, "rouge2_recall": 0.07638282078411858, "rouge2_recall_stderr": 0.001616116555083565, "rougeL_fmeasure": 0.15458892744897648, "rougeL_fmeasure_stderr": 0.0012683660303014826, "rougeL_precision": 0.13408781232450467, "rougeL_precision_stderr": 0.0015202885672530046, "rougeL_recall": 0.228446964266991, "rougeL_recall_stderr": 0.0021094393606001564, "rougeLsum_fmeasure": 0.20107870479610124, "rougeLsum_fmeasure_stderr": 0.0017683098199495104, "rougeLsum_precision": 0.17549862722884657, "rougeLsum_precision_stderr": 0.0020277616701441, "rougeLsum_recall": 0.2903907813858148, "rougeLsum_recall_stderr": 0.002551194252900688}}, "2": {"tldr_en": {"bleu": 2.7004264776619973, "bleu_stderr": 0.04754974260466431, "rouge1_fmeasure": 0.21431878758710393, "rouge1_fmeasure_stderr": 0.001892231712078584, "rouge1_precision": 0.19925348839011015, "rouge1_precision_stderr": 0.002530887504474253, "rouge1_recall": 0.30168216059058767, "rouge1_recall_stderr": 0.0027584806748195286, "rouge2_fmeasure": 0.05343405736694957, "rouge2_fmeasure_stderr": 0.0010466156834578935, "rouge2_precision": 0.05211637513787623, "rouge2_precision_stderr": 0.0014101145058240962, "rouge2_recall": 0.07661749012514224, "rouge2_recall_stderr": 0.0016621817373886419, "rougeL_fmeasure": 0.15648689865161197, "rougeL_fmeasure_stderr": 0.001335577123427596, "rougeL_precision": 0.1459245266787269, "rougeL_precision_stderr": 0.001968097177719896, "rougeL_recall": 0.22436913136774292, "rougeL_recall_stderr": 0.0021845377458775844, "rougeLsum_fmeasure": 0.19998202287730088, "rougeLsum_fmeasure_stderr": 0.0017573384668861053, "rougeLsum_precision": 0.18579433666925044, "rougeLsum_precision_stderr": 0.0023734027386265824, "rougeLsum_recall": 0.28243649699768403, "rougeLsum_recall_stderr": 0.0026008524848487456}}, "3": {"tldr_en": {"bleu": 2.768175532900837, "bleu_stderr": 0.08791006908529778, "rouge1_fmeasure": 0.18047060140146295, "rouge1_fmeasure_stderr": 0.0022363114882498754, "rouge1_precision": 0.1749459104752398, "rouge1_precision_stderr": 0.0028081656356849396, "rouge1_recall": 0.25083768002533663, "rouge1_recall_stderr": 0.0032561216516905035, "rouge2_fmeasure": 0.0450401939765468, "rouge2_fmeasure_stderr": 0.0010303104556608174, "rouge2_precision": 0.04472735340979592, "rouge2_precision_stderr": 0.001350826653292456, "rouge2_recall": 0.06454184644160546, "rouge2_recall_stderr": 0.0016287344839170377, "rougeL_fmeasure": 0.13350234163346472, "rougeL_fmeasure_stderr": 0.0016223374974201662, "rougeL_precision": 0.13041950681379508, "rougeL_precision_stderr": 0.0022318085027860915, "rougeL_recall": 0.1890702982418797, "rougeL_recall_stderr": 0.0025828526179600634, "rougeLsum_fmeasure": 0.16855536587916595, "rougeLsum_fmeasure_stderr": 0.0020816012906548086, "rougeLsum_precision": 0.16353530897797455, "rougeLsum_precision_stderr": 0.002648699146049675, "rougeLsum_recall": 0.23494832208674143, "rougeLsum_recall_stderr": 0.003071224693002067}}, "4": {"tldr_en": {"bleu": 0.5400251321986815, "bleu_stderr": 0.04043739724986152, "rouge1_fmeasure": 0.05748810248970106, "rouge1_fmeasure_stderr": 0.0019355199912109056, "rouge1_precision": 0.05841942875795064, "rouge1_precision_stderr": 0.002258669318096189, "rouge1_recall": 0.08310445450589375, "rouge1_recall_stderr": 0.0028861997102921245, "rouge2_fmeasure": 0.01440686850465203, "rouge2_fmeasure_stderr": 0.0006872138582561727, "rouge2_precision": 0.015562598115830875, "rouge2_precision_stderr": 0.0010426862288334016, "rouge2_recall": 0.021902149806596185, "rouge2_recall_stderr": 0.0011840444107333239, "rougeL_fmeasure": 0.043426885817304715, "rougeL_fmeasure_stderr": 0.0014445186217684237, "rougeL_precision": 0.04491102015047465, "rougeL_precision_stderr": 0.0018157746751351956, "rougeL_recall": 0.0639489574231478, "rougeL_recall_stderr": 0.0022694007205090625, "rougeLsum_fmeasure": 0.053572124748162045, "rougeLsum_fmeasure_stderr": 0.0018012586971119814, "rougeLsum_precision": 0.054595377345807605, "rougeLsum_precision_stderr": 0.002130882878195972, "rougeLsum_recall": 0.07756220488125122, "rougeLsum_recall_stderr": 0.0026983682067508114}}, "5": {"tldr_en": {"bleu": 4.830496766216795e-07, "bleu_stderr": 1.3632675428818108e-06, "rouge1_fmeasure": 0.00925886684520902, "rouge1_fmeasure_stderr": 0.0008626552575302221, "rouge1_precision": 0.009596776100526004, "rouge1_precision_stderr": 0.001019209195219191, "rouge1_recall": 0.01392559118633662, "rouge1_recall_stderr": 0.0013597144698971257, "rouge2_fmeasure": 0.002342164016882043, "rouge2_fmeasure_stderr": 0.00028524985779443895, "rouge2_precision": 0.00278778328805662, "rouge2_precision_stderr": 0.0004979552032669205, "rouge2_recall": 0.003746712452544001, "rouge2_recall_stderr": 0.0005041844367851664, "rougeL_fmeasure": 0.007183528387342368, "rougeL_fmeasure_stderr": 0.0006702843412463305, "rougeL_precision": 0.007610444403310259, "rougeL_precision_stderr": 0.0008454347622960979, "rougeL_recall": 0.010924872503483939, "rougeL_recall_stderr": 0.0010786580983304865, "rougeLsum_fmeasure": 0.008560782240302826, "rougeLsum_fmeasure_stderr": 0.0007971922119836481, "rougeLsum_precision": 0.008955989656763915, "rougeLsum_precision_stderr": 0.0009697015558074734, "rougeLsum_recall": 0.012833001053222909, "rougeLsum_recall_stderr": 0.0012481388962640927}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.6485963049016981, "bleu_stderr": 0.04147540657533846, "rouge1_fmeasure": 0.10530094743277582, "rouge1_fmeasure_stderr": 0.0011475008410041217, "rouge1_precision": 0.08118908870859427, "rouge1_precision_stderr": 0.0010502348257405204, "rouge1_recall": 0.16114628661538824, "rouge1_recall_stderr": 0.0016308230658841419, "rouge2_fmeasure": 0.014504611707718457, "rouge2_fmeasure_stderr": 0.0004328253644535482, "rouge2_precision": 0.011512639257160307, "rouge2_precision_stderr": 0.0003958636264012863, "rouge2_recall": 0.021242412676126293, "rouge2_recall_stderr": 0.0006341502248111687, "rougeL_fmeasure": 0.10251613030676501, "rougeL_fmeasure_stderr": 0.001133123737831979, "rougeL_precision": 0.07887021804923314, "rougeL_precision_stderr": 0.0010138195390399778, "rougeL_recall": 0.15721608674276338, "rougeL_recall_stderr": 0.0016254041219117546, "rougeLsum_fmeasure": 0.09019264348297594, "rougeLsum_fmeasure_stderr": 0.0009843878120389428, "rougeLsum_precision": 0.06955075821996862, "rougeLsum_precision_stderr": 0.0009278452749297827, "rougeLsum_recall": 0.13846806947227538, "rougeLsum_recall_stderr": 0.0014005015563785719}}, "1": {"generate_text_restaurant": {"bleu": 11.202939466802542, "bleu_stderr": 0.15557163142979236, "rouge1_fmeasure": 0.44242905190492954, "rouge1_fmeasure_stderr": 0.002297039766149727, "rouge1_precision": 0.5276213771845283, "rouge1_precision_stderr": 0.0031644402162150204, "rouge1_recall": 0.4199128051324714, "rouge1_recall_stderr": 0.0029363725065963847, "rouge2_fmeasure": 0.20190483168073992, "rouge2_fmeasure_stderr": 0.0019079943047259457, "rouge2_precision": 0.24348133396318822, "rouge2_precision_stderr": 0.0024486552475781556, "rouge2_recall": 0.19171502531070608, "rouge2_recall_stderr": 0.002076856678425889, "rougeL_fmeasure": 0.31831466008523984, "rougeL_fmeasure_stderr": 0.0020089558605603224, "rougeL_precision": 0.38160396282429127, "rougeL_precision_stderr": 0.0027868687264562394, "rougeL_recall": 0.30167752620089633, "rougeL_recall_stderr": 0.002395127408402739, "rougeLsum_fmeasure": 0.3589538427898855, "rougeLsum_fmeasure_stderr": 0.0022583552169404796, "rougeLsum_precision": 0.42898878033348153, "rougeLsum_precision_stderr": 0.0030397544234292777, "rougeLsum_recall": 0.3404134846400345, "rougeLsum_recall_stderr": 0.0026908887018321018}}, "2": {"generate_text_restaurant": {"bleu": 13.224489183075699, "bleu_stderr": 0.1542919629511485, "rouge1_fmeasure": 0.46712866601497155, "rouge1_fmeasure_stderr": 0.002228929211920135, "rouge1_precision": 0.5552791226061188, "rouge1_precision_stderr": 0.003254774499994265, "rouge1_recall": 0.44192153304211734, "rouge1_recall_stderr": 0.002841866842369357, "rouge2_fmeasure": 0.22826333671399182, "rouge2_fmeasure_stderr": 0.001982493892067202, "rouge2_precision": 0.27559407981411804, "rouge2_precision_stderr": 0.0026840620337579744, "rouge2_recall": 0.2160232963974258, "rouge2_recall_stderr": 0.0021622070141122758, "rougeL_fmeasure": 0.34607223155387784, "rougeL_fmeasure_stderr": 0.0020499510521957863, "rougeL_precision": 0.41343793780549115, "rougeL_precision_stderr": 0.0029673993676579053, "rougeL_recall": 0.3270153550649051, "rougeL_recall_stderr": 0.002419997997789347, "rougeLsum_fmeasure": 0.3879557827136412, "rougeLsum_fmeasure_stderr": 0.002246731755612453, "rougeLsum_precision": 0.4622035845427888, "rougeLsum_precision_stderr": 0.0031692057784133473, "rougeLsum_recall": 0.36668222287547403, "rougeLsum_recall_stderr": 0.0026606231763240088}}, "3": {"generate_text_restaurant": {"bleu": 13.908159500411205, "bleu_stderr": 0.20174014620170244, "rouge1_fmeasure": 0.47315087182166937, "rouge1_fmeasure_stderr": 0.0021809395838394297, "rouge1_precision": 0.5586145393847135, "rouge1_precision_stderr": 0.0032093462628881024, "rouge1_recall": 0.4479253835621662, "rouge1_recall_stderr": 0.0027861082552568537, "rouge2_fmeasure": 0.23487744324001114, "rouge2_fmeasure_stderr": 0.0020072755600355594, "rouge2_precision": 0.2811326577572755, "rouge2_precision_stderr": 0.002660259888528566, "rouge2_recall": 0.22237355181607343, "rouge2_recall_stderr": 0.0021928806781846234, "rougeL_fmeasure": 0.354131271065786, "rougeL_fmeasure_stderr": 0.0020718253695416254, "rougeL_precision": 0.41974248298297395, "rougeL_precision_stderr": 0.0029716355503285126, "rougeL_recall": 0.33479036646237736, "rougeL_recall_stderr": 0.0024152288975060475, "rougeLsum_fmeasure": 0.3970905326362423, "rougeLsum_fmeasure_stderr": 0.0022642244285014093, "rougeLsum_precision": 0.4690248076511698, "rougeLsum_precision_stderr": 0.0031541833691154436, "rougeLsum_recall": 0.3759227050158839, "rougeLsum_recall_stderr": 0.00267463975539547}}, "4": {"generate_text_restaurant": {"bleu": 14.074485969366448, "bleu_stderr": 0.15597466693230966, "rouge1_fmeasure": 0.47347311435925227, "rouge1_fmeasure_stderr": 0.0022379801388622295, "rouge1_precision": 0.5599161256832338, "rouge1_precision_stderr": 0.0032425821310525698, "rouge1_recall": 0.4451384314886588, "rouge1_recall_stderr": 0.002764740052967528, "rouge2_fmeasure": 0.23627847944239633, "rouge2_fmeasure_stderr": 0.002068806847352114, "rouge2_precision": 0.28322196876096284, "rouge2_precision_stderr": 0.002731652037279103, "rouge2_recall": 0.22202287091247278, "rouge2_recall_stderr": 0.002200228535591399, "rougeL_fmeasure": 0.3565888842432221, "rougeL_fmeasure_stderr": 0.0020875325881162836, "rougeL_precision": 0.4228642176471588, "rougeL_precision_stderr": 0.002955640491032234, "rougeL_recall": 0.3350162539475165, "rougeL_recall_stderr": 0.0023931644525465464, "rougeLsum_fmeasure": 0.3997926861751811, "rougeLsum_fmeasure_stderr": 0.0023223701465468023, "rougeLsum_precision": 0.47254222967645987, "rougeLsum_precision_stderr": 0.003172921364352923, "rougeLsum_recall": 0.3759274566672933, "rougeLsum_recall_stderr": 0.0026693043199832616}}, "5": {"generate_text_restaurant": {"bleu": 14.100673417779602, "bleu_stderr": 0.1700464491733536, "rouge1_fmeasure": 0.4763097997248468, "rouge1_fmeasure_stderr": 0.002217032057449442, "rouge1_precision": 0.5629367404882956, "rouge1_precision_stderr": 0.003237018756314601, "rouge1_recall": 0.4475117440378833, "rouge1_recall_stderr": 0.0027438170780577564, "rouge2_fmeasure": 0.23752008995458807, "rouge2_fmeasure_stderr": 0.0020632823792997718, "rouge2_precision": 0.28412809399742106, "rouge2_precision_stderr": 0.002691518203387113, "rouge2_recall": 0.22326882970173134, "rouge2_recall_stderr": 0.0022049714378463655, "rougeL_fmeasure": 0.360217577146398, "rougeL_fmeasure_stderr": 0.002116681014730912, "rougeL_precision": 0.4265229770672075, "rougeL_precision_stderr": 0.0029468709368597047, "rougeL_recall": 0.3384047369164962, "rougeL_recall_stderr": 0.002443108834356436, "rougeLsum_fmeasure": 0.40315853166110005, "rougeLsum_fmeasure_stderr": 0.0022934508658619883, "rougeLsum_precision": 0.47658389994415734, "rougeLsum_precision_stderr": 0.003163890773620455, "rougeLsum_recall": 0.37891160781444594, "rougeLsum_recall_stderr": 0.002667873296658845}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.9143202589684163, "bleu_stderr": 0.05580184327046581, "rouge1_fmeasure": 0.20470721378981208, "rouge1_fmeasure_stderr": 0.0025226661937158843, "rouge1_precision": 0.1557902081027143, "rouge1_precision_stderr": 0.0022032876149756923, "rouge1_recall": 0.3350673880900352, "rouge1_recall_stderr": 0.004415721805932336, "rouge2_fmeasure": 0.046452959321020074, "rouge2_fmeasure_stderr": 0.0015784279302210721, "rouge2_precision": 0.03445353133744446, "rouge2_precision_stderr": 0.0012154256732747522, "rouge2_recall": 0.0798147637879874, "rouge2_recall_stderr": 0.002874504215431858, "rougeL_fmeasure": 0.15701792133721412, "rougeL_fmeasure_stderr": 0.001947718861021701, "rougeL_precision": 0.1191156380371497, "rougeL_precision_stderr": 0.001689615531792646, "rougeL_recall": 0.2591701921041493, "rougeL_recall_stderr": 0.0035902067326739374, "rougeLsum_fmeasure": 0.16148501668654708, "rougeLsum_fmeasure_stderr": 0.002159848508290731, "rougeLsum_precision": 0.12234454178265489, "rougeLsum_precision_stderr": 0.0018119505282670214, "rougeLsum_recall": 0.26657728841908296, "rougeLsum_recall_stderr": 0.003920062645044777}}, "1": {"article_DOC_summary": {"bleu": 1.6877533563889122, "bleu_stderr": 0.09478328567541407, "rouge1_fmeasure": 0.18340013205662573, "rouge1_fmeasure_stderr": 0.0025516121371784496, "rouge1_precision": 0.13046457384320914, "rouge1_precision_stderr": 0.001899307370424846, "rouge1_recall": 0.32113789701215495, "rouge1_recall_stderr": 0.004337639576472594, "rouge2_fmeasure": 0.04096305934583588, "rouge2_fmeasure_stderr": 0.0015175654731468782, "rouge2_precision": 0.02886080390581479, "rouge2_precision_stderr": 0.001078544714447002, "rouge2_recall": 0.07345674571988704, "rouge2_recall_stderr": 0.002741461964189982, "rougeL_fmeasure": 0.1433098805171502, "rougeL_fmeasure_stderr": 0.0019504428531146873, "rougeL_precision": 0.10169303665196328, "rougeL_precision_stderr": 0.001434974699819564, "rougeL_recall": 0.252942835024364, "rougeL_recall_stderr": 0.0034868230588887836, "rougeLsum_fmeasure": 0.1466246543732191, "rougeLsum_fmeasure_stderr": 0.0020796489449315457, "rougeLsum_precision": 0.10403633135467649, "rougeLsum_precision_stderr": 0.0015254340828458335, "rougeLsum_recall": 0.25849490865028, "rougeLsum_recall_stderr": 0.0036741939807784555}}, "2": {"article_DOC_summary": {"bleu": 1.7177243215516453, "bleu_stderr": 0.09757791560901401, "rouge1_fmeasure": 0.18556704305676935, "rouge1_fmeasure_stderr": 0.0024908334509911776, "rouge1_precision": 0.13198766549097124, "rouge1_precision_stderr": 0.0018585101946349278, "rouge1_recall": 0.3253291743058924, "rouge1_recall_stderr": 0.004286918895481337, "rouge2_fmeasure": 0.04250852041968189, "rouge2_fmeasure_stderr": 0.0015273224602859812, "rouge2_precision": 0.02998125544073759, "rouge2_precision_stderr": 0.0010790606390901271, "rouge2_recall": 0.07619626643473272, "rouge2_recall_stderr": 0.00283206081800297, "rougeL_fmeasure": 0.14684147138930212, "rougeL_fmeasure_stderr": 0.0019308071156994384, "rougeL_precision": 0.10428336846259101, "rougeL_precision_stderr": 0.001428543354863552, "rougeL_recall": 0.2586407657468338, "rougeL_recall_stderr": 0.0034290843517090206, "rougeLsum_fmeasure": 0.14897921374822085, "rougeLsum_fmeasure_stderr": 0.002065878860703936, "rougeLsum_precision": 0.10568850249903466, "rougeLsum_precision_stderr": 0.0015149805955265887, "rougeLsum_recall": 0.26320724362867987, "rougeLsum_recall_stderr": 0.003734496619168348}}, "3": {"article_DOC_summary": {"bleu": 1.5871077824548825, "bleu_stderr": 0.09086334216154317, "rouge1_fmeasure": 0.17738625476286662, "rouge1_fmeasure_stderr": 0.0027024391420201748, "rouge1_precision": 0.12893858110935913, "rouge1_precision_stderr": 0.002120552918947113, "rouge1_recall": 0.3041357783592899, "rouge1_recall_stderr": 0.004569246127758115, "rouge2_fmeasure": 0.037421370357715276, "rouge2_fmeasure_stderr": 0.0014052682495280058, "rouge2_precision": 0.026842996488638495, "rouge2_precision_stderr": 0.0010318732158795176, "rouge2_recall": 0.06597251723951142, "rouge2_recall_stderr": 0.002542624442472336, "rougeL_fmeasure": 0.1371693458674969, "rougeL_fmeasure_stderr": 0.0020218247744835465, "rougeL_precision": 0.09937263586580253, "rougeL_precision_stderr": 0.001551269457995846, "rougeL_recall": 0.23696534016153958, "rougeL_recall_stderr": 0.0035807139002337796, "rougeLsum_fmeasure": 0.14162289632540054, "rougeLsum_fmeasure_stderr": 0.002208401145989062, "rougeLsum_precision": 0.10253669977784535, "rougeLsum_precision_stderr": 0.0016793421052154036, "rougeLsum_recall": 0.244701923743446, "rougeLsum_recall_stderr": 0.003884290828497489}}, "4": {"article_DOC_summary": {"bleu": 0.7195399308964131, "bleu_stderr": 0.08904444517578003, "rouge1_fmeasure": 0.04854561922260044, "rouge1_fmeasure_stderr": 0.002712085047341467, "rouge1_precision": 0.04080971503303662, "rouge1_precision_stderr": 0.0024653466581541048, "rouge1_recall": 0.07559502848935112, "rouge1_recall_stderr": 0.00431085336204459, "rouge2_fmeasure": 0.010329381513392572, "rouge2_fmeasure_stderr": 0.0009258366777536868, "rouge2_precision": 0.008256325324203758, "rouge2_precision_stderr": 0.0008111140318720209, "rouge2_recall": 0.01667875950715412, "rouge2_recall_stderr": 0.0014867298732535266, "rougeL_fmeasure": 0.03801311674055581, "rougeL_fmeasure_stderr": 0.0021161212991394045, "rougeL_precision": 0.03226725296426277, "rougeL_precision_stderr": 0.001985912844697581, "rougeL_recall": 0.05933716580880214, "rougeL_recall_stderr": 0.00339176724274709, "rougeLsum_fmeasure": 0.0393740289151267, "rougeLsum_fmeasure_stderr": 0.002236920778294106, "rougeLsum_precision": 0.03333036255441974, "rougeLsum_precision_stderr": 0.002071203320532273, "rougeLsum_recall": 0.06159419431363466, "rougeLsum_recall_stderr": 0.0035992824416162072}}, "5": {"article_DOC_summary": {"bleu": 2.0988877305316802e-38, "bleu_stderr": 6.623676181861596e-33, "rouge1_fmeasure": 0.002542484098776228, "rouge1_fmeasure_stderr": 0.000736067844289626, "rouge1_precision": 0.0027962678405899387, "rouge1_precision_stderr": 0.0008136665421638554, "rouge1_recall": 0.0024018524920290463, "rouge1_recall_stderr": 0.00069708293480438, "rouge2_fmeasure": 0.000384360030304287, "rouge2_fmeasure_stderr": 0.00023575063814645501, "rouge2_precision": 0.00042489265574726173, "rouge2_precision_stderr": 0.0002486632131064098, "rouge2_recall": 0.0003552108269089401, "rouge2_recall_stderr": 0.0002257173323273388, "rougeL_fmeasure": 0.001952863273498143, "rougeL_fmeasure_stderr": 0.0005672979405586935, "rougeL_precision": 0.0021054779981265173, "rougeL_precision_stderr": 0.0006041114084163064, "rougeL_recall": 0.001882860190134431, "rougeL_recall_stderr": 0.0005579442723360266, "rougeLsum_fmeasure": 0.0021605449739656364, "rougeLsum_fmeasure_stderr": 0.000627676699804427, "rougeLsum_precision": 0.002344649755421413, "rougeLsum_precision_stderr": 0.0006798799021959105, "rougeLsum_recall": 0.002067860375134616, "rougeLsum_recall_stderr": 0.0006068459708856913}}}}
|
4b284b17bc4seed3/evaluation/generation/slim.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_1.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.419099869257768,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.031194484818270595
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.07428309105028663,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0013421912364638756
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.3681872402424481,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005159853930374422
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.11607434147069716,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.001850012647052215
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.03414063617858362,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0007933999992729109
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.17816632883864172,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0035533786923895483
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.053578321986389954,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.001133087024919444
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.07011599452669921,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0012215073828548383
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.34670303763093524,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004708835524319321
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.10966698573493393,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.001691639710103682
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.0710048733913006,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0012664792510428985
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.3511669157710232,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.004777230946733221
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.11093711988762583,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0017419311094458187
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
4b284b17bc4seed3/evaluation/generation/slim.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_2.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.49148117932803576,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.03324507460068721
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.07902449045214965,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0014400750722415926
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.38710322053697205,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005007184653985026
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.12352694244894348,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.001906067502643356
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.036715281213131654,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0009607779535126237
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.19100291871237257,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0035992814997918465
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.05758229272088689,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0012545799402892376
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.07447122807071402,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0013245130142878934
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.36501755504076894,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004584459311617146
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.11654156016524217,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0017494100681024593
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.07548416619016371,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0013763939300205914
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.36910030227270246,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.0046609020187487835
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.1179785006841591,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.001814093430670857
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 2,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
4b284b17bc4seed3/evaluation/generation/slim.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_3.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.5454434527628271,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.022000087258590196
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.07951071501680998,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0013359456205039861
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.3995230958506392,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005034816997358078
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.12498856968844978,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0018098054094220577
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.03707635426895073,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0008167570156064652
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.1992034939045538,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.0036422113394633904
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.05860558987165317,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0011419392844985948
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.07471295258744902,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0012223182423721426
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.3744384042521063,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004568576393272832
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.11746888068908964,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0016549198819411505
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.07594762490697103,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0012672858502526187
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.3811393237968741,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.0047038769161714305
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.11935873168692221,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0017154967515797509
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 3,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
4b284b17bc4seed3/evaluation/generation/slim.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_4.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.556791168026395,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.032317222532058425
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.080998817707546,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0013339367163834135
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.41001072494996027,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005030415658363669
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.12736411777875145,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0017854249994775197
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.03778277803591952,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0007985595270354018
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.20751135104918955,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.003724857487616047
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.05987151579611729,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0011284434076196772
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.07546065667342094,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0011740175946367535
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.3832768865547933,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004588049116216556
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.11884367913833037,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0015910790592394222
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.0770058183518017,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.001258314779905608
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.3899354662021343,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.0046962894721850395
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.12102961800817194,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0016712189432626576
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 4,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
4b284b17bc4seed3/evaluation/generation/slim.4b284b17bc4seed3_GEM-web_nlg_en_PALM_prompt_5.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "GEM/web_nlg_en",
|
5 |
+
"prompt_name": "PALM_prompt",
|
6 |
+
"bleu": 0.6622000128005519,
|
7 |
+
"dataset_path": "GEM/web_nlg",
|
8 |
+
"dataset_name": "en",
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.04388394326637253
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "GEM/web_nlg_en",
|
14 |
+
"prompt_name": "PALM_prompt",
|
15 |
+
"rouge1_precision": 0.0828504135671057,
|
16 |
+
"dataset_path": "GEM/web_nlg",
|
17 |
+
"dataset_name": "en",
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0013617141050727788
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "GEM/web_nlg_en",
|
23 |
+
"prompt_name": "PALM_prompt",
|
24 |
+
"rouge1_recall": 0.4253413823950648,
|
25 |
+
"dataset_path": "GEM/web_nlg",
|
26 |
+
"dataset_name": "en",
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.005183982393059217
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "GEM/web_nlg_en",
|
32 |
+
"prompt_name": "PALM_prompt",
|
33 |
+
"rouge1_fmeasure": 0.13062641947274276,
|
34 |
+
"dataset_path": "GEM/web_nlg",
|
35 |
+
"dataset_name": "en",
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0018599113152054116
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "GEM/web_nlg_en",
|
41 |
+
"prompt_name": "PALM_prompt",
|
42 |
+
"rouge2_precision": 0.03910444837960125,
|
43 |
+
"dataset_path": "GEM/web_nlg",
|
44 |
+
"dataset_name": "en",
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0008204438440226741
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "GEM/web_nlg_en",
|
50 |
+
"prompt_name": "PALM_prompt",
|
51 |
+
"rouge2_recall": 0.21879925159555091,
|
52 |
+
"dataset_path": "GEM/web_nlg",
|
53 |
+
"dataset_name": "en",
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.003830311541729702
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "GEM/web_nlg_en",
|
59 |
+
"prompt_name": "PALM_prompt",
|
60 |
+
"rouge2_fmeasure": 0.06215772688428125,
|
61 |
+
"dataset_path": "GEM/web_nlg",
|
62 |
+
"dataset_name": "en",
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0011664907997072857
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "GEM/web_nlg_en",
|
68 |
+
"prompt_name": "PALM_prompt",
|
69 |
+
"rougeL_precision": 0.07664370158738154,
|
70 |
+
"dataset_path": "GEM/web_nlg",
|
71 |
+
"dataset_name": "en",
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0012087093820958138
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "GEM/web_nlg_en",
|
77 |
+
"prompt_name": "PALM_prompt",
|
78 |
+
"rougeL_recall": 0.3931352132690314,
|
79 |
+
"dataset_path": "GEM/web_nlg",
|
80 |
+
"dataset_name": "en",
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.004667042410814549
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "GEM/web_nlg_en",
|
86 |
+
"prompt_name": "PALM_prompt",
|
87 |
+
"rougeL_fmeasure": 0.12085376540315042,
|
88 |
+
"dataset_path": "GEM/web_nlg",
|
89 |
+
"dataset_name": "en",
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0016483232519714076
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "GEM/web_nlg_en",
|
95 |
+
"prompt_name": "PALM_prompt",
|
96 |
+
"rougeLsum_precision": 0.0784559405601766,
|
97 |
+
"dataset_path": "GEM/web_nlg",
|
98 |
+
"dataset_name": "en",
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.001278094865391385
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "GEM/web_nlg_en",
|
104 |
+
"prompt_name": "PALM_prompt",
|
105 |
+
"rougeLsum_recall": 0.4021636336590835,
|
106 |
+
"dataset_path": "GEM/web_nlg",
|
107 |
+
"dataset_name": "en",
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.004801298542115264
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "GEM/web_nlg_en",
|
113 |
+
"prompt_name": "PALM_prompt",
|
114 |
+
"rougeLsum_fmeasure": 0.12367283564282297,
|
115 |
+
"dataset_path": "GEM/web_nlg",
|
116 |
+
"dataset_name": "en",
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.001741836937013877
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed3/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 5,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|