Muennighoff commited on Feb 27, 2023

Commit

16a575b

•

1 Parent(s): a915ab8

Add

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_2.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_3.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_4.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_5.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_0.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_2.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_3.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_4.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_5.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_gem_xsum_article_DOC_summary_4.json +1 -0
evaluation/generation/agg.lm1-2b8-55b-oscarroots_gem_xsum_article_DOC_summary_5.json +1 -0
evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.jsonl +3 -0
evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.jsonl +3 -0
evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_2.jsonl +3 -0
evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_3.jsonl +3 -0
evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_4.jsonl +3 -0
evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_5.jsonl +3 -0
evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.jsonl +3 -0
evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_2.jsonl +3 -0
evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_3.jsonl +3 -0
evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_4.jsonl +3 -0
evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_5.jsonl +3 -0
evaluation/generation/examples.lm1-2b8-55b-oscarroots_gem_xsum_article_DOC_summary_5.jsonl +3 -0
evaluation/generation/merged.csv +53 -0
evaluation/generation/merged.json +1 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_2.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_3.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_4.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_5.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_0.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_2.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_3.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_4.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_5.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_gem_xsum_article_DOC_summary_4.json +133 -0
evaluation/generation/slim.lm1-2b8-55b-oscarroots_gem_xsum_article_DOC_summary_5.json +133 -0
evaluation/rankeval/lm1-2b8-55b-oscarroots_0.csv +21 -0
evaluation/rankeval/lm1-2b8-55b-oscarroots_0.json +56 -1
evaluation/rankeval/lm1-2b8-55b-oscarroots_0_lm-eval_global_step52452_2023-02-25-11-16-27_0shots_backup.json +0 -32
evaluation/rankeval/lm1-2b8-55b-oscarroots_1.csv +21 -0
evaluation/rankeval/lm1-2b8-55b-oscarroots_1.json +56 -1
evaluation/rankeval/lm1-2b8-55b-oscarroots_1_lm-eval_global_step52452_2023-02-25-11-18-29_1shots_backup.json +0 -32
evaluation/rankeval/lm1-2b8-55b-oscarroots_2.csv +21 -0
evaluation/rankeval/lm1-2b8-55b-oscarroots_2.json +56 -1

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.2562657244023514, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.028344931968085282}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.062384701456087446, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0020959572066061704}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2778238258177499, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0053586090146941585}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08917647320322222, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001969587935532424}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.026957871325524504, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00119791534895436}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.12120656565208492, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003245668709085193}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03885973727074715, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011411233576479184}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.059948665447718395, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001993918360545074}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2696777384075072, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.005223094399514082}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08588431201368854, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018539010477647254}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.05914169795430463, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0020593373696654797}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.25838316242026105, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00491849430291812}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08392617035294098, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001881634203344931}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.44906636509622533, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.039450588630595015}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.11288061517490074, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00386994874457578}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.27968139569340955, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004818798730073509}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.13316786961188862, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0032035615287408364}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.052903771126418274, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0024560083140397187}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.13670929059101208, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0032752111527625427}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.0635687916821055, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020746952988998694}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.10294903334837356, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0034785078085129637}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.26443193955147987, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004508721603501755}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.12246686808714284, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0027865982212275067}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.1043770278231783, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0035484629358203635}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2648398336113232, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004503000799632382}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.12355055054182587, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002841014595626855}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_2.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.4687852352128821, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.037809750600987985}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.12939788800380767, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004056238000462917}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3205721940394599, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004784451352989951}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.15485025651871712, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0034302178802564512}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.06272684866048131, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002451752863709562}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.16040449198937512, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003451151403396264}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.07545945954698206, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0021937365311268525}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.11538953087200472, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00346589668028597}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.30175228398138576, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004432821529779081}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.14062442945039813, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0028845693981807787}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.11767324801231895, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0035688465184641432}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3033452610052125, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004447592173686305}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.14250354572951518, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002959772587239683}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_3.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5387522714694768, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.023700066593108533}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.14028377513050538, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004341036446200867}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.33808480225227105, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004840897586860907}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.1660401631118107, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0037496181550606754}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.07249658632299814, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0029199126351436076}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1738659001719329, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003647562909324833}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.08448456167004659, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002619780755266116}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.12529450659786737, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003740919506421422}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3182618579312013, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004525730253384199}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.15093871662748365, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0032225789949202046}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.12866365606890057, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003897701131855631}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3209266755398485, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004550196873941636}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.15379137375695537, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0033296076273151513}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_4.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.6154728892401495, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05492052153457257}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.1416216780254008, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004403560474088744}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.350369181874358, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004821260815960307}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.16918194985742865, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003726050287797417}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.07377427634131609, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0029769913238928973}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.18193245830782417, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003657633632618109}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.08639654396535042, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0025466510150414677}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.12691040102952247, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0038030608678551996}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.33016396642156515, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004483672398798779}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.15419230913013102, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0031966205747524113}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.1301329876623986, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003962229410505262}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3326898264482343, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004514627367420683}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.15686161901707832, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.003303387308503824}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_5.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.6280080728696126, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04207890973166607}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.15509132478375567, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004705843597864089}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.36072408074578627, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004932090632111422}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.1785781279253249, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003864561091208021}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.08180998073756966, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0031797028106942695}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1895120629191651, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0037580265385154963}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.09234282351104058, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0026995310439716595}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.13813720543380648, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004053616600136467}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.33922496409575775, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004622505715509403}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.1620344936907299, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.003324512248892316}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.14168764741861828, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004239364453934911}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.34078189394441194, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004625313010789047}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.16446961325308998, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0034245269422962608}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_0.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.14831832571904127, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0020031574591708096}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.23669379639579727, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002619263402445374}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.16843326604027012, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018787988149176356}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.027495279137204204, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008008675778483577}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.04558631579219217, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013034624565288761}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.03124497623150672, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000826359540629872}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.11944058601706234, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014613554843883904}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.197662087047554, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0022098401272736727}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.1373698337101501, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014061340796650912}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.13662513471708013, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001834224913870075}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.21848967035350617, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00242500324850602}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.15530559329563356, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017277092690194619}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.4416513225274035, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07150581722616807}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.2474989333475308, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004192775409890903}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.16589702996724112, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002477684211999076}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1595930665634592, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020260063889951897}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.06456439217534583, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0026036519415425876}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.03395563608028862, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0011867665512589468}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.03442090774378983, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011197085684525982}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.2007100384592662, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0036402244436710527}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.12968060330920403, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019100623389590725}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.12543184498335516, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015786460435038327}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.2356687196157363, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004040097148447038}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.15692818247747958, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023149233432171795}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.15113313197846198, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019006022698345565}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.9112541973999004, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08163120654641273}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_2.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.3671552578777415, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00464310641179873}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.18316424412100443, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002534818661881543}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.20013093911819188, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021449192871676984}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.11652986149569292, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0033165278944766326}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.048557963351484004, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012897480876968325}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.05444986552431719, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012835369876590637}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.3030100733810117, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004160446544180668}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1454519391726235, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020102539956042787}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.1600577177279773, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017194821563251488}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.34948494261958846, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00452044298551405}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.17241597783925564, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023681037127358745}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.18899973850835908, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020281161914398288}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.4920293719291493, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06820740373159184}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_3.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.31853023306817924, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005101746577027814}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.14722792301810253, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026467115286204013}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.16433308908585867, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002402134739956357}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.10260882729326853, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0033488070271773295}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.04037648707007591, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012615029724490608}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.04583954485462417, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012897030660356447}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.26690611784240603, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004530048771512807}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.11874183155575303, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021251987841602225}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.13364146877613045, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00195553742839865}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.3046492013868888, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004949401564936648}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.1393491071946238, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0024976587731928646}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.15587618164528091, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002273493914941302}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.6055821821151304, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06565825928935355}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_4.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.10914047578759525, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004168163234199949}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.04875414041264775, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0020018963702338544}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.05455257649733116, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002012404693346845}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.037261877113724175, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002437336913811512}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.013448412424643126, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008658300088105147}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.01577378906241384, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009385589100462621}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.09325777757588789, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003680574837942309}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.03991105511182912, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0016279952050045603}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.045100032510421204, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016666399875103265}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.10390247551362326, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004006559051889402}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.04597031630948619, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0018861168846922784}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.051422475813845084, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018931617381586926}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.006932560692974423, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0017990239865701577}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_5.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.019024182319545703, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019567530452185085}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.00809080049652358, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0008930277662354756}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.009250948371284507, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0009365636508956353}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.007878162042298584, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0012119853940782482}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0024981499124714754, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00036249889299460995}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0031062476201456015, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00043823448085888057}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.016781942931569222, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017964368358031513}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.006703878314586593, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0007330971808896851}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.007784924334651645, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0007922903839285351}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.01855296243342967, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001923182050974051}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.00783323994370169, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0008689584836341853}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.008943947340808445, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0009064353361518755}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.5486400435190646e-20, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 5.928974165985541e-18}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_gem_xsum_article_DOC_summary_4.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.06536803730199292, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0040080446277792475}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0562578647719299, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003466710519052521}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.05638135159346031, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0033862796262482922}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.013453387419618108, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0014082914388032289}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.012484556808642255, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012859869239756115}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.01218356666574454, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012325430366165824}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.049695294130091966, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0031067522139507655}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.04191724945748147, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0026229390757436582}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.042197213021247744, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0025684294584781297}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0501407378152416, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00312422462775062}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.04244935232602974, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026544789695008624}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.042656324273577836, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002590374442922286}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.114133025618453, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03902166340342278}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/agg.lm1-2b8-55b-oscarroots_gem_xsum_article_DOC_summary_5.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}

evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da240232ef9803594f89287674f0f363b3ee085ab5ada3f7ba1fcf42c74238a9
+size 4174292

evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1792872b8360c67aa01618d992d596079f04524a1f1503f34e0c5a8ea05d498
+size 4762910

evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_2.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b9f81e6fa3c44ef15ccfd5e9a1b3a9c9e0ca5f9d010e2e5a1f113c7e000edae
+size 5711084

evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_3.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6815796d6e2d668597f9433d5131bd8c3111fe44288ea40c86c6e7ce22c2c5a
+size 6596539

evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_4.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0bfb2896e5849621f3f7a89e6828f03535322465ea21e99c1dc62eee20af2496
+size 7512274

evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_5.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6857bff28723014fc359741f07d82b77ccad062bda89d8b023ae9517dfde3048
+size 8376795

evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c6cae8b73c1ba64dd6f8a95d50808d0f96c3442588e0ee7c521e666397beb40
+size 12958324

evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_2.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa0eff950ed43d1c253f2b1a3f259895bbffa3bee9d2b6502468fe7c3de050b8
+size 18431550

evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_3.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7974ff13a24110559f516d7f81ffc6072c020bc498ceca28c4e7640bfc930e9e
+size 23921835

evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_4.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b312eade5aa25054bd7f12b3e19c5d7ea6499e17edc996b6caa4bdcfca0d531a
+size 29333993

evaluation/generation/examples.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_5.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26e70db861ffdbfc8cf88d452d03a5b56b13a4b70c3a538eb70524271e3f7804
+size 34778872

evaluation/generation/examples.lm1-2b8-55b-oscarroots_gem_xsum_article_DOC_summary_5.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65d4d0227bd4d26f3e9a99d734f3d030328c20d626c6f922920b3dc155977f38
+size 13896009

evaluation/generation/merged.csv ADDED Viewed

	@@ -0,0 +1,53 @@

+dataset,fewshots,prompt,metric,value
+e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.07031535968691954
+e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.07031535968691954
+e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.21317325143407617
+e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.21317325143407617
+e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.23375071908985015
+e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.23375071908985015
+e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.238646665315093
+e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.238646665315093
+e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.23937430569965518
+e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.23937430569965518
+e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.23628094628078694
+e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.23628094628078694
+e2e_nlg_cleaned,5,average,multiple,0.20525687458439684
+gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.02780714664643612
+gem_xsum,0,median,rouge2_fmeasure,0.02780714664643612
+gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.04500583650450346
+gem_xsum,1,median,rouge2_fmeasure,0.04500583650450346
+gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.051882326943507785
+gem_xsum,2,median,rouge2_fmeasure,0.051882326943507785
+gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.05187808695380671
+gem_xsum,3,median,rouge2_fmeasure,0.05187808695380671
+gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.01218356666574454
+gem_xsum,4,median,rouge2_fmeasure,0.01218356666574454
+gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.0
+gem_xsum,5,median,rouge2_fmeasure,0.0
+gem_xsum,5,average,multiple,0.031459493952333106
+web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.03885973727074715
+web_nlg_en,0,median,rouge2_fmeasure,0.03885973727074715
+web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.0635687916821055
+web_nlg_en,1,median,rouge2_fmeasure,0.0635687916821055
+web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.07545945954698206
+web_nlg_en,2,median,rouge2_fmeasure,0.07545945954698206
+web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.08448456167004659
+web_nlg_en,3,median,rouge2_fmeasure,0.08448456167004659
+web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.08639654396535042
+web_nlg_en,4,median,rouge2_fmeasure,0.08639654396535042
+web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.09234282351104058
+web_nlg_en,5,median,rouge2_fmeasure,0.09234282351104058
+web_nlg_en,5,average,multiple,0.07351865294104538
+wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.03124497623150672
+wiki_lingua_en,0,median,rouge2_fmeasure,0.03124497623150672
+wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.03442090774378983
+wiki_lingua_en,1,median,rouge2_fmeasure,0.03442090774378983
+wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.05444986552431719
+wiki_lingua_en,2,median,rouge2_fmeasure,0.05444986552431719
+wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.04583954485462417
+wiki_lingua_en,3,median,rouge2_fmeasure,0.04583954485462417
+wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.01577378906241384
+wiki_lingua_en,4,median,rouge2_fmeasure,0.01577378906241384
+wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0031062476201456015
+wiki_lingua_en,5,median,rouge2_fmeasure,0.0031062476201456015
+wiki_lingua_en,5,average,multiple,0.030805888506132893

evaluation/generation/merged.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.2562657244023514, "bleu_stderr": 0.028344931968085282, "rouge1_fmeasure": 0.08917647320322222, "rouge1_fmeasure_stderr": 0.001969587935532424, "rouge1_precision": 0.062384701456087446, "rouge1_precision_stderr": 0.0020959572066061704, "rouge1_recall": 0.2778238258177499, "rouge1_recall_stderr": 0.0053586090146941585, "rouge2_fmeasure": 0.03885973727074715, "rouge2_fmeasure_stderr": 0.0011411233576479184, "rouge2_precision": 0.026957871325524504, "rouge2_precision_stderr": 0.00119791534895436, "rouge2_recall": 0.12120656565208492, "rouge2_recall_stderr": 0.003245668709085193, "rougeL_fmeasure": 0.08588431201368854, "rougeL_fmeasure_stderr": 0.0018539010477647254, "rougeL_precision": 0.059948665447718395, "rougeL_precision_stderr": 0.001993918360545074, "rougeL_recall": 0.2696777384075072, "rougeL_recall_stderr": 0.005223094399514082, "rougeLsum_fmeasure": 0.08392617035294098, "rougeLsum_fmeasure_stderr": 0.001881634203344931, "rougeLsum_precision": 0.05914169795430463, "rougeLsum_precision_stderr": 0.0020593373696654797, "rougeLsum_recall": 0.25838316242026105, "rougeLsum_recall_stderr": 0.00491849430291812}}, "1": {"PALM_prompt": {"bleu": 0.44906636509622533, "bleu_stderr": 0.039450588630595015, "rouge1_fmeasure": 0.13316786961188862, "rouge1_fmeasure_stderr": 0.0032035615287408364, "rouge1_precision": 0.11288061517490074, "rouge1_precision_stderr": 0.00386994874457578, "rouge1_recall": 0.27968139569340955, "rouge1_recall_stderr": 0.004818798730073509, "rouge2_fmeasure": 0.0635687916821055, "rouge2_fmeasure_stderr": 0.0020746952988998694, "rouge2_precision": 0.052903771126418274, "rouge2_precision_stderr": 0.0024560083140397187, "rouge2_recall": 0.13670929059101208, "rouge2_recall_stderr": 0.0032752111527625427, "rougeL_fmeasure": 0.12246686808714284, "rougeL_fmeasure_stderr": 0.0027865982212275067, "rougeL_precision": 0.10294903334837356, "rougeL_precision_stderr": 0.0034785078085129637, "rougeL_recall": 0.26443193955147987, "rougeL_recall_stderr": 0.004508721603501755, "rougeLsum_fmeasure": 0.12355055054182587, "rougeLsum_fmeasure_stderr": 0.002841014595626855, "rougeLsum_precision": 0.1043770278231783, "rougeLsum_precision_stderr": 0.0035484629358203635, "rougeLsum_recall": 0.2648398336113232, "rougeLsum_recall_stderr": 0.004503000799632382}}, "2": {"PALM_prompt": {"bleu": 0.4687852352128821, "bleu_stderr": 0.037809750600987985, "rouge1_fmeasure": 0.15485025651871712, "rouge1_fmeasure_stderr": 0.0034302178802564512, "rouge1_precision": 0.12939788800380767, "rouge1_precision_stderr": 0.004056238000462917, "rouge1_recall": 0.3205721940394599, "rouge1_recall_stderr": 0.004784451352989951, "rouge2_fmeasure": 0.07545945954698206, "rouge2_fmeasure_stderr": 0.0021937365311268525, "rouge2_precision": 0.06272684866048131, "rouge2_precision_stderr": 0.002451752863709562, "rouge2_recall": 0.16040449198937512, "rouge2_recall_stderr": 0.003451151403396264, "rougeL_fmeasure": 0.14062442945039813, "rougeL_fmeasure_stderr": 0.0028845693981807787, "rougeL_precision": 0.11538953087200472, "rougeL_precision_stderr": 0.00346589668028597, "rougeL_recall": 0.30175228398138576, "rougeL_recall_stderr": 0.004432821529779081, "rougeLsum_fmeasure": 0.14250354572951518, "rougeLsum_fmeasure_stderr": 0.002959772587239683, "rougeLsum_precision": 0.11767324801231895, "rougeLsum_precision_stderr": 0.0035688465184641432, "rougeLsum_recall": 0.3033452610052125, "rougeLsum_recall_stderr": 0.004447592173686305}}, "3": {"PALM_prompt": {"bleu": 0.5387522714694768, "bleu_stderr": 0.023700066593108533, "rouge1_fmeasure": 0.1660401631118107, "rouge1_fmeasure_stderr": 0.0037496181550606754, "rouge1_precision": 0.14028377513050538, "rouge1_precision_stderr": 0.004341036446200867, "rouge1_recall": 0.33808480225227105, "rouge1_recall_stderr": 0.004840897586860907, "rouge2_fmeasure": 0.08448456167004659, "rouge2_fmeasure_stderr": 0.002619780755266116, "rouge2_precision": 0.07249658632299814, "rouge2_precision_stderr": 0.0029199126351436076, "rouge2_recall": 0.1738659001719329, "rouge2_recall_stderr": 0.003647562909324833, "rougeL_fmeasure": 0.15093871662748365, "rougeL_fmeasure_stderr": 0.0032225789949202046, "rougeL_precision": 0.12529450659786737, "rougeL_precision_stderr": 0.003740919506421422, "rougeL_recall": 0.3182618579312013, "rougeL_recall_stderr": 0.004525730253384199, "rougeLsum_fmeasure": 0.15379137375695537, "rougeLsum_fmeasure_stderr": 0.0033296076273151513, "rougeLsum_precision": 0.12866365606890057, "rougeLsum_precision_stderr": 0.003897701131855631, "rougeLsum_recall": 0.3209266755398485, "rougeLsum_recall_stderr": 0.004550196873941636}}, "4": {"PALM_prompt": {"bleu": 0.6154728892401495, "bleu_stderr": 0.05492052153457257, "rouge1_fmeasure": 0.16918194985742865, "rouge1_fmeasure_stderr": 0.003726050287797417, "rouge1_precision": 0.1416216780254008, "rouge1_precision_stderr": 0.004403560474088744, "rouge1_recall": 0.350369181874358, "rouge1_recall_stderr": 0.004821260815960307, "rouge2_fmeasure": 0.08639654396535042, "rouge2_fmeasure_stderr": 0.0025466510150414677, "rouge2_precision": 0.07377427634131609, "rouge2_precision_stderr": 0.0029769913238928973, "rouge2_recall": 0.18193245830782417, "rouge2_recall_stderr": 0.003657633632618109, "rougeL_fmeasure": 0.15419230913013102, "rougeL_fmeasure_stderr": 0.0031966205747524113, "rougeL_precision": 0.12691040102952247, "rougeL_precision_stderr": 0.0038030608678551996, "rougeL_recall": 0.33016396642156515, "rougeL_recall_stderr": 0.004483672398798779, "rougeLsum_fmeasure": 0.15686161901707832, "rougeLsum_fmeasure_stderr": 0.003303387308503824, "rougeLsum_precision": 0.1301329876623986, "rougeLsum_precision_stderr": 0.003962229410505262, "rougeLsum_recall": 0.3326898264482343, "rougeLsum_recall_stderr": 0.004514627367420683}}, "5": {"PALM_prompt": {"bleu": 0.6280080728696126, "bleu_stderr": 0.04207890973166607, "rouge1_fmeasure": 0.1785781279253249, "rouge1_fmeasure_stderr": 0.003864561091208021, "rouge1_precision": 0.15509132478375567, "rouge1_precision_stderr": 0.004705843597864089, "rouge1_recall": 0.36072408074578627, "rouge1_recall_stderr": 0.004932090632111422, "rouge2_fmeasure": 0.09234282351104058, "rouge2_fmeasure_stderr": 0.0026995310439716595, "rouge2_precision": 0.08180998073756966, "rouge2_precision_stderr": 0.0031797028106942695, "rouge2_recall": 0.1895120629191651, "rouge2_recall_stderr": 0.0037580265385154963, "rougeL_fmeasure": 0.1620344936907299, "rougeL_fmeasure_stderr": 0.003324512248892316, "rougeL_precision": 0.13813720543380648, "rougeL_precision_stderr": 0.004053616600136467, "rougeL_recall": 0.33922496409575775, "rougeL_recall_stderr": 0.004622505715509403, "rougeLsum_fmeasure": 0.16446961325308998, "rougeLsum_fmeasure_stderr": 0.0034245269422962608, "rougeLsum_precision": 0.14168764741861828, "rougeLsum_precision_stderr": 0.004239364453934911, "rougeLsum_recall": 0.34078189394441194, "rougeLsum_recall_stderr": 0.004625313010789047}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 1.4416513225274035, "bleu_stderr": 0.07150581722616807, "rouge1_fmeasure": 0.16843326604027012, "rouge1_fmeasure_stderr": 0.0018787988149176356, "rouge1_precision": 0.14831832571904127, "rouge1_precision_stderr": 0.0020031574591708096, "rouge1_recall": 0.23669379639579727, "rouge1_recall_stderr": 0.002619263402445374, "rouge2_fmeasure": 0.03124497623150672, "rouge2_fmeasure_stderr": 0.000826359540629872, "rouge2_precision": 0.027495279137204204, "rouge2_precision_stderr": 0.0008008675778483577, "rouge2_recall": 0.04558631579219217, "rouge2_recall_stderr": 0.0013034624565288761, "rougeL_fmeasure": 0.1373698337101501, "rougeL_fmeasure_stderr": 0.0014061340796650912, "rougeL_precision": 0.11944058601706234, "rougeL_precision_stderr": 0.0014613554843883904, "rougeL_recall": 0.197662087047554, "rougeL_recall_stderr": 0.0022098401272736727, "rougeLsum_fmeasure": 0.15530559329563356, "rougeLsum_fmeasure_stderr": 0.0017277092690194619, "rougeLsum_precision": 0.13662513471708013, "rougeLsum_precision_stderr": 0.001834224913870075, "rougeLsum_recall": 0.21848967035350617, "rougeLsum_recall_stderr": 0.00242500324850602}}, "1": {"tldr_en": {"bleu": 1.9112541973999004, "bleu_stderr": 0.08163120654641273, "rouge1_fmeasure": 0.1595930665634592, "rouge1_fmeasure_stderr": 0.0020260063889951897, "rouge1_precision": 0.2474989333475308, "rouge1_precision_stderr": 0.004192775409890903, "rouge1_recall": 0.16589702996724112, "rouge1_recall_stderr": 0.002477684211999076, "rouge2_fmeasure": 0.03442090774378983, "rouge2_fmeasure_stderr": 0.0011197085684525982, "rouge2_precision": 0.06456439217534583, "rouge2_precision_stderr": 0.0026036519415425876, "rouge2_recall": 0.03395563608028862, "rouge2_recall_stderr": 0.0011867665512589468, "rougeL_fmeasure": 0.12543184498335516, "rougeL_fmeasure_stderr": 0.0015786460435038327, "rougeL_precision": 0.2007100384592662, "rougeL_precision_stderr": 0.0036402244436710527, "rougeL_recall": 0.12968060330920403, "rougeL_recall_stderr": 0.0019100623389590725, "rougeLsum_fmeasure": 0.15113313197846198, "rougeLsum_fmeasure_stderr": 0.0019006022698345565, "rougeLsum_precision": 0.2356687196157363, "rougeLsum_precision_stderr": 0.004040097148447038, "rougeLsum_recall": 0.15692818247747958, "rougeLsum_recall_stderr": 0.0023149233432171795}}, "2": {"tldr_en": {"bleu": 2.4920293719291493, "bleu_stderr": 0.06820740373159184, "rouge1_fmeasure": 0.20013093911819188, "rouge1_fmeasure_stderr": 0.0021449192871676984, "rouge1_precision": 0.3671552578777415, "rouge1_precision_stderr": 0.00464310641179873, "rouge1_recall": 0.18316424412100443, "rouge1_recall_stderr": 0.002534818661881543, "rouge2_fmeasure": 0.05444986552431719, "rouge2_fmeasure_stderr": 0.0012835369876590637, "rouge2_precision": 0.11652986149569292, "rouge2_precision_stderr": 0.0033165278944766326, "rouge2_recall": 0.048557963351484004, "rouge2_recall_stderr": 0.0012897480876968325, "rougeL_fmeasure": 0.1600577177279773, "rougeL_fmeasure_stderr": 0.0017194821563251488, "rougeL_precision": 0.3030100733810117, "rougeL_precision_stderr": 0.004160446544180668, "rougeL_recall": 0.1454519391726235, "rougeL_recall_stderr": 0.0020102539956042787, "rougeLsum_fmeasure": 0.18899973850835908, "rougeLsum_fmeasure_stderr": 0.0020281161914398288, "rougeLsum_precision": 0.34948494261958846, "rougeLsum_precision_stderr": 0.00452044298551405, "rougeLsum_recall": 0.17241597783925564, "rougeLsum_recall_stderr": 0.0023681037127358745}}, "3": {"tldr_en": {"bleu": 1.6055821821151304, "bleu_stderr": 0.06565825928935355, "rouge1_fmeasure": 0.16433308908585867, "rouge1_fmeasure_stderr": 0.002402134739956357, "rouge1_precision": 0.31853023306817924, "rouge1_precision_stderr": 0.005101746577027814, "rouge1_recall": 0.14722792301810253, "rouge1_recall_stderr": 0.0026467115286204013, "rouge2_fmeasure": 0.04583954485462417, "rouge2_fmeasure_stderr": 0.0012897030660356447, "rouge2_precision": 0.10260882729326853, "rouge2_precision_stderr": 0.0033488070271773295, "rouge2_recall": 0.04037648707007591, "rouge2_recall_stderr": 0.0012615029724490608, "rougeL_fmeasure": 0.13364146877613045, "rougeL_fmeasure_stderr": 0.00195553742839865, "rougeL_precision": 0.26690611784240603, "rougeL_precision_stderr": 0.004530048771512807, "rougeL_recall": 0.11874183155575303, "rougeL_recall_stderr": 0.0021251987841602225, "rougeLsum_fmeasure": 0.15587618164528091, "rougeLsum_fmeasure_stderr": 0.002273493914941302, "rougeLsum_precision": 0.3046492013868888, "rougeLsum_precision_stderr": 0.004949401564936648, "rougeLsum_recall": 0.1393491071946238, "rougeLsum_recall_stderr": 0.0024976587731928646}}, "4": {"tldr_en": {"bleu": 0.006932560692974423, "bleu_stderr": 0.0017990239865701577, "rouge1_fmeasure": 0.05455257649733116, "rouge1_fmeasure_stderr": 0.002012404693346845, "rouge1_precision": 0.10914047578759525, "rouge1_precision_stderr": 0.004168163234199949, "rouge1_recall": 0.04875414041264775, "rouge1_recall_stderr": 0.0020018963702338544, "rouge2_fmeasure": 0.01577378906241384, "rouge2_fmeasure_stderr": 0.0009385589100462621, "rouge2_precision": 0.037261877113724175, "rouge2_precision_stderr": 0.002437336913811512, "rouge2_recall": 0.013448412424643126, "rouge2_recall_stderr": 0.0008658300088105147, "rougeL_fmeasure": 0.045100032510421204, "rougeL_fmeasure_stderr": 0.0016666399875103265, "rougeL_precision": 0.09325777757588789, "rougeL_precision_stderr": 0.003680574837942309, "rougeL_recall": 0.03991105511182912, "rougeL_recall_stderr": 0.0016279952050045603, "rougeLsum_fmeasure": 0.051422475813845084, "rougeLsum_fmeasure_stderr": 0.0018931617381586926, "rougeLsum_precision": 0.10390247551362326, "rougeLsum_precision_stderr": 0.004006559051889402, "rougeLsum_recall": 0.04597031630948619, "rougeLsum_recall_stderr": 0.0018861168846922784}}, "5": {"tldr_en": {"bleu": 1.5486400435190646e-20, "bleu_stderr": 5.928974165985541e-18, "rouge1_fmeasure": 0.009250948371284507, "rouge1_fmeasure_stderr": 0.0009365636508956353, "rouge1_precision": 0.019024182319545703, "rouge1_precision_stderr": 0.0019567530452185085, "rouge1_recall": 0.00809080049652358, "rouge1_recall_stderr": 0.0008930277662354756, "rouge2_fmeasure": 0.0031062476201456015, "rouge2_fmeasure_stderr": 0.00043823448085888057, "rouge2_precision": 0.007878162042298584, "rouge2_precision_stderr": 0.0012119853940782482, "rouge2_recall": 0.0024981499124714754, "rouge2_recall_stderr": 0.00036249889299460995, "rougeL_fmeasure": 0.007784924334651645, "rougeL_fmeasure_stderr": 0.0007922903839285351, "rougeL_precision": 0.016781942931569222, "rougeL_precision_stderr": 0.0017964368358031513, "rougeL_recall": 0.006703878314586593, "rougeL_recall_stderr": 0.0007330971808896851, "rougeLsum_fmeasure": 0.008943947340808445, "rougeLsum_fmeasure_stderr": 0.0009064353361518755, "rougeLsum_precision": 0.01855296243342967, "rougeLsum_precision_stderr": 0.001923182050974051, "rougeLsum_recall": 0.00783323994370169, "rougeLsum_recall_stderr": 0.0008689584836341853}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 3.9626050306721474, "bleu_stderr": 0.06024253979194259, "rouge1_fmeasure": 0.20284230759257002, "rouge1_fmeasure_stderr": 0.0017464348006575089, "rouge1_precision": 0.1540985111881967, "rouge1_precision_stderr": 0.0014515620097711474, "rouge1_recall": 0.31140262883046665, "rouge1_recall_stderr": 0.0024251988602698067, "rouge2_fmeasure": 0.07031535968691954, "rouge2_fmeasure_stderr": 0.001168196298024764, "rouge2_precision": 0.05355018382472272, "rouge2_precision_stderr": 0.0009106172876128309, "rouge2_recall": 0.10690154622755875, "rouge2_recall_stderr": 0.0017688696499961142, "rougeL_fmeasure": 0.1753585977687643, "rougeL_fmeasure_stderr": 0.0014608296346256319, "rougeL_precision": 0.1327617315507999, "rougeL_precision_stderr": 0.001197611976138011, "rougeL_recall": 0.2711114650902915, "rougeL_recall_stderr": 0.0021100724050297824, "rougeLsum_fmeasure": 0.18282034804175978, "rougeLsum_fmeasure_stderr": 0.0016241979991028408, "rougeLsum_precision": 0.1387916405736721, "rougeLsum_precision_stderr": 0.0013406445609266567, "rougeLsum_recall": 0.2810609872616038, "rougeLsum_recall_stderr": 0.0022804790016154107}}, "1": {"generate_text_restaurant": {"bleu": 11.593827123577368, "bleu_stderr": 0.12407021076125696, "rouge1_fmeasure": 0.45020216196031243, "rouge1_fmeasure_stderr": 0.0023747076071133698, "rouge1_precision": 0.5509122991009537, "rouge1_precision_stderr": 0.0033661250504397264, "rouge1_recall": 0.42140078880413323, "rouge1_recall_stderr": 0.002953747532093766, "rouge2_fmeasure": 0.21317325143407617, "rouge2_fmeasure_stderr": 0.0020127889592610262, "rouge2_precision": 0.2650742428626904, "rouge2_precision_stderr": 0.0027402930606959166, "rouge2_recall": 0.19921132653073137, "rouge2_recall_stderr": 0.0021274437677356846, "rougeL_fmeasure": 0.32668549807617586, "rougeL_fmeasure_stderr": 0.002086318029186934, "rougeL_precision": 0.40311524575775876, "rougeL_precision_stderr": 0.003054424817440509, "rougeL_recall": 0.30479322858663915, "rougeL_recall_stderr": 0.0023980133198678024, "rougeLsum_fmeasure": 0.3686111369512851, "rougeLsum_fmeasure_stderr": 0.0023418416254223887, "rougeLsum_precision": 0.4523945212247963, "rougeLsum_precision_stderr": 0.00326815326036064, "rougeLsum_recall": 0.3446091324717908, "rougeLsum_recall_stderr": 0.0027191831013193444}}, "2": {"generate_text_restaurant": {"bleu": 12.88190096920724, "bleu_stderr": 0.17583922253090617, "rouge1_fmeasure": 0.4721264666575481, "rouge1_fmeasure_stderr": 0.002268547262390229, "rouge1_precision": 0.5822720419906148, "rouge1_precision_stderr": 0.0032962978807536585, "rouge1_recall": 0.4337280880183242, "rouge1_recall_stderr": 0.0028431977686949614, "rouge2_fmeasure": 0.23375071908985015, "rouge2_fmeasure_stderr": 0.0020523859536162883, "rouge2_precision": 0.2931157748619445, "rouge2_precision_stderr": 0.0028165108542885105, "rouge2_recall": 0.2147077046167617, "rouge2_recall_stderr": 0.0021786795843148085, "rougeL_fmeasure": 0.3522716641150115, "rougeL_fmeasure_stderr": 0.0020991364053741914, "rougeL_precision": 0.4369685674136242, "rougeL_precision_stderr": 0.003073517456588823, "rougeL_recall": 0.32297689774894583, "rougeL_recall_stderr": 0.0024097063140581243, "rougeLsum_fmeasure": 0.393682260751447, "rougeLsum_fmeasure_stderr": 0.0023007053568388975, "rougeLsum_precision": 0.4863687819740207, "rougeLsum_precision_stderr": 0.0032507530347318466, "rougeLsum_recall": 0.3613566974986443, "rougeLsum_recall_stderr": 0.0026568660208198115}}, "3": {"generate_text_restaurant": {"bleu": 13.262436270778784, "bleu_stderr": 0.12966668718803287, "rouge1_fmeasure": 0.47525930406845607, "rouge1_fmeasure_stderr": 0.0022663496963600087, "rouge1_precision": 0.5813922477147344, "rouge1_precision_stderr": 0.0032594641701596022, "rouge1_recall": 0.43750260743630337, "rouge1_recall_stderr": 0.002850951212768957, "rouge2_fmeasure": 0.238646665315093, "rouge2_fmeasure_stderr": 0.0020938647359093243, "rouge2_precision": 0.2963848070406136, "rouge2_precision_stderr": 0.002811561987291426, "rouge2_recall": 0.2198532494536229, "rouge2_recall_stderr": 0.0022399911777690423, "rougeL_fmeasure": 0.3550920935079866, "rougeL_fmeasure_stderr": 0.002168420115905195, "rougeL_precision": 0.43608943087396823, "rougeL_precision_stderr": 0.0030728003407875415, "rougeL_recall": 0.32653357863256616, "rougeL_recall_stderr": 0.002478214580278462, "rougeLsum_fmeasure": 0.3982122870175536, "rougeLsum_fmeasure_stderr": 0.002333698912202139, "rougeLsum_precision": 0.4873959951365985, "rougeLsum_precision_stderr": 0.0032309608357424142, "rougeLsum_recall": 0.3665706405925854, "rougeLsum_recall_stderr": 0.0027071065833861527}}, "4": {"generate_text_restaurant": {"bleu": 13.476662518674425, "bleu_stderr": 0.14804924963101374, "rouge1_fmeasure": 0.4751483029381261, "rouge1_fmeasure_stderr": 0.0022939871226639315, "rouge1_precision": 0.575148309756594, "rouge1_precision_stderr": 0.0032456743499240143, "rouge1_recall": 0.4379553251280355, "rouge1_recall_stderr": 0.0027875515895071923, "rouge2_fmeasure": 0.23937430569965518, "rouge2_fmeasure_stderr": 0.0021223157395609733, "rouge2_precision": 0.2932213385624628, "rouge2_precision_stderr": 0.0027850927611603377, "rouge2_recall": 0.220787156975807, "rouge2_recall_stderr": 0.0022308643708334754, "rougeL_fmeasure": 0.35385483221937813, "rougeL_fmeasure_stderr": 0.0021780339813240314, "rougeL_precision": 0.4297630033921168, "rougeL_precision_stderr": 0.0030250335936550877, "rougeL_recall": 0.3260216890650744, "rougeL_recall_stderr": 0.0024522487516979575, "rougeLsum_fmeasure": 0.3976384682503085, "rougeLsum_fmeasure_stderr": 0.0023729927223769785, "rougeLsum_precision": 0.4813876805218902, "rougeLsum_precision_stderr": 0.0032181540128894265, "rougeLsum_recall": 0.36675888786779215, "rougeLsum_recall_stderr": 0.002699461696539119}}, "5": {"generate_text_restaurant": {"bleu": 13.156897203455948, "bleu_stderr": 0.08630136341873869, "rouge1_fmeasure": 0.4729751649957838, "rouge1_fmeasure_stderr": 0.0022801925622949843, "rouge1_precision": 0.5738627336760422, "rouge1_precision_stderr": 0.0032894505336520264, "rouge1_recall": 0.4353618479598648, "rouge1_recall_stderr": 0.0027767731270488508, "rouge2_fmeasure": 0.23628094628078694, "rouge2_fmeasure_stderr": 0.00211214055033469, "rouge2_precision": 0.29086794940233046, "rouge2_precision_stderr": 0.002818639353471394, "rouge2_recall": 0.21741764823749096, "rouge2_recall_stderr": 0.002221377915423391, "rougeL_fmeasure": 0.3534318578391924, "rougeL_fmeasure_stderr": 0.0021797527155027353, "rougeL_precision": 0.4304412705229436, "rougeL_precision_stderr": 0.003073791049621951, "rougeL_recall": 0.32508975084283026, "rougeL_recall_stderr": 0.0024528402950547467, "rougeLsum_fmeasure": 0.39642244501779056, "rougeLsum_fmeasure_stderr": 0.002354679808047655, "rougeLsum_precision": 0.4815893698288744, "rougeLsum_precision_stderr": 0.0032662227256431515, "rougeLsum_recall": 0.36475434401700674, "rougeLsum_recall_stderr": 0.00266853275926591}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 1.0285249742430023, "bleu_stderr": 0.04658561770880105, "rouge1_fmeasure": 0.1478842341216362, "rouge1_fmeasure_stderr": 0.0027472203051295764, "rouge1_precision": 0.10711262762227787, "rouge1_precision_stderr": 0.0020601915536585967, "rouge1_recall": 0.25337181483295146, "rouge1_recall_stderr": 0.0046856436696164905, "rouge2_fmeasure": 0.02780714664643612, "rouge2_fmeasure_stderr": 0.0012134748561574466, "rouge2_precision": 0.019781013833534398, "rouge2_precision_stderr": 0.0008722452654034713, "rouge2_recall": 0.04912277748197487, "rouge2_recall_stderr": 0.0021576559260432712, "rougeL_fmeasure": 0.11266163818049864, "rougeL_fmeasure_stderr": 0.0020299599299667603, "rougeL_precision": 0.08153762034631415, "rougeL_precision_stderr": 0.0015205040880351287, "rougeL_recall": 0.19377740656973594, "rougeL_recall_stderr": 0.0035212605780510554, "rougeLsum_fmeasure": 0.11968068511417429, "rougeLsum_fmeasure_stderr": 0.0021748845339722685, "rougeLsum_precision": 0.08657778420332544, "rougeLsum_precision_stderr": 0.0016263154802718866, "rougeLsum_recall": 0.20583552514369016, "rougeLsum_recall_stderr": 0.003771689144281502}}, "1": {"article_DOC_summary": {"bleu": 2.1792875082477123, "bleu_stderr": 0.06319420014636688, "rouge1_fmeasure": 0.2183934186796248, "rouge1_fmeasure_stderr": 0.0031131023395006203, "rouge1_precision": 0.2154872949099143, "rouge1_precision_stderr": 0.0037469904189591395, "rouge1_recall": 0.25867586586469277, "rouge1_recall_stderr": 0.004134512250176692, "rouge2_fmeasure": 0.04500583650450346, "rouge2_fmeasure_stderr": 0.001863892149786922, "rouge2_precision": 0.0447245429308555, "rouge2_precision_stderr": 0.0020714108653434506, "rouge2_recall": 0.05502827731187604, "rouge2_recall_stderr": 0.0023758194054523916, "rougeL_fmeasure": 0.1635144970491249, "rougeL_fmeasure_stderr": 0.0024820064917382306, "rougeL_precision": 0.1614628963956234, "rougeL_precision_stderr": 0.0030120325913654804, "rougeL_recall": 0.19472956411902964, "rougeL_recall_stderr": 0.0033235017592203077, "rougeLsum_fmeasure": 0.1671666603817299, "rougeLsum_fmeasure_stderr": 0.0025413985087851003, "rougeLsum_precision": 0.16439844895006708, "rougeLsum_precision_stderr": 0.003021699034691555, "rougeLsum_recall": 0.2002131667137321, "rougeLsum_recall_stderr": 0.003522860713518912}}, "2": {"article_DOC_summary": {"bleu": 2.9719119603418074, "bleu_stderr": 0.16832095088560176, "rouge1_fmeasure": 0.23579050025596546, "rouge1_fmeasure_stderr": 0.0032208877086972645, "rouge1_precision": 0.24578221078853943, "rouge1_precision_stderr": 0.0038311545433087246, "rouge1_recall": 0.25247232169607386, "rouge1_recall_stderr": 0.003851791759690215, "rouge2_fmeasure": 0.051882326943507785, "rouge2_fmeasure_stderr": 0.002120030804437139, "rouge2_precision": 0.05385976155670831, "rouge2_precision_stderr": 0.002293715583911734, "rouge2_recall": 0.05655894066313464, "rouge2_recall_stderr": 0.002406489537857907, "rougeL_fmeasure": 0.17839101172174274, "rougeL_fmeasure_stderr": 0.0026918945920387577, "rougeL_precision": 0.18601506569279253, "rougeL_precision_stderr": 0.0031620641585856746, "rougeL_recall": 0.19159840411716447, "rougeL_recall_stderr": 0.003222530909275831, "rougeLsum_fmeasure": 0.18050054360244336, "rougeLsum_fmeasure_stderr": 0.002709070917463213, "rougeLsum_precision": 0.18783807621807994, "rougeLsum_precision_stderr": 0.003156835477899629, "rougeLsum_recall": 0.19452321319767416, "rougeLsum_recall_stderr": 0.0033253733370942227}}, "3": {"article_DOC_summary": {"bleu": 3.2041870719788714, "bleu_stderr": 0.20611661341919532, "rouge1_fmeasure": 0.2277409702430376, "rouge1_fmeasure_stderr": 0.0036665816219810165, "rouge1_precision": 0.24477878269919434, "rouge1_precision_stderr": 0.0043304256076557015, "rouge1_recall": 0.23546035253406764, "rouge1_recall_stderr": 0.0040420090423460295, "rouge2_fmeasure": 0.05187808695380671, "rouge2_fmeasure_stderr": 0.0022736997047107283, "rouge2_precision": 0.05535540959023755, "rouge2_precision_stderr": 0.002520852290920342, "rouge2_recall": 0.05400488265902618, "rouge2_recall_stderr": 0.0023942103295597683, "rougeL_fmeasure": 0.1726765296689622, "rougeL_fmeasure_stderr": 0.0030037675906149446, "rougeL_precision": 0.18676731153569484, "rougeL_precision_stderr": 0.0035994106495257387, "rougeL_recall": 0.17809432971388495, "rougeL_recall_stderr": 0.003241778335625792, "rougeLsum_fmeasure": 0.1742948037879305, "rougeLsum_fmeasure_stderr": 0.0030227160469161974, "rougeLsum_precision": 0.1881533844870454, "rougeLsum_precision_stderr": 0.003601435982381053, "rougeLsum_recall": 0.18040945162861388, "rougeLsum_recall_stderr": 0.0033378628217040685}}, "4": {"article_DOC_summary": {"bleu": 0.114133025618453, "bleu_stderr": 0.03902166340342278, "rouge1_fmeasure": 0.05638135159346031, "rouge1_fmeasure_stderr": 0.0033862796262482922, "rouge1_precision": 0.06536803730199292, "rouge1_precision_stderr": 0.0040080446277792475, "rouge1_recall": 0.0562578647719299, "rouge1_recall_stderr": 0.003466710519052521, "rouge2_fmeasure": 0.01218356666574454, "rouge2_fmeasure_stderr": 0.0012325430366165824, "rouge2_precision": 0.013453387419618108, "rouge2_precision_stderr": 0.0014082914388032289, "rouge2_recall": 0.012484556808642255, "rouge2_recall_stderr": 0.0012859869239756115, "rougeL_fmeasure": 0.042197213021247744, "rougeL_fmeasure_stderr": 0.0025684294584781297, "rougeL_precision": 0.049695294130091966, "rougeL_precision_stderr": 0.0031067522139507655, "rougeL_recall": 0.04191724945748147, "rougeL_recall_stderr": 0.0026229390757436582, "rougeLsum_fmeasure": 0.042656324273577836, "rougeLsum_fmeasure_stderr": 0.002590374442922286, "rougeLsum_precision": 0.0501407378152416, "rougeLsum_precision_stderr": 0.00312422462775062, "rougeLsum_recall": 0.04244935232602974, "rougeLsum_recall_stderr": 0.0026544789695008624}}, "5": {"article_DOC_summary": {"bleu": 0.0, "bleu_stderr": 0.0, "rouge1_fmeasure": 0.0, "rouge1_fmeasure_stderr": 0.0, "rouge1_precision": 0.0, "rouge1_precision_stderr": 0.0, "rouge1_recall": 0.0, "rouge1_recall_stderr": 0.0, "rouge2_fmeasure": 0.0, "rouge2_fmeasure_stderr": 0.0, "rouge2_precision": 0.0, "rouge2_precision_stderr": 0.0, "rouge2_recall": 0.0, "rouge2_recall_stderr": 0.0, "rougeL_fmeasure": 0.0, "rougeL_fmeasure_stderr": 0.0, "rougeL_precision": 0.0, "rougeL_precision_stderr": 0.0, "rougeL_recall": 0.0, "rougeL_recall_stderr": 0.0, "rougeLsum_fmeasure": 0.0, "rougeLsum_fmeasure_stderr": 0.0, "rougeLsum_precision": 0.0, "rougeLsum_precision_stderr": 0.0, "rougeLsum_recall": 0.0, "rougeLsum_recall_stderr": 0.0}}}}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.2562657244023514,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.028344931968085282
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.062384701456087446,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0020959572066061704
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.2778238258177499,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.0053586090146941585
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.08917647320322222,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.001969587935532424
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.026957871325524504,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.00119791534895436
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.12120656565208492,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.003245668709085193
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.03885973727074715,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0011411233576479184
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.059948665447718395,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.001993918360545074
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.2696777384075072,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.005223094399514082
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.08588431201368854,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0018539010477647254
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.05914169795430463,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0020593373696654797
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.25838316242026105,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.00491849430291812
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.08392617035294098,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.001881634203344931
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 0,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.44906636509622533,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.039450588630595015
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.11288061517490074,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.00386994874457578
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.27968139569340955,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.004818798730073509
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.13316786961188862,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0032035615287408364
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.052903771126418274,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0024560083140397187
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.13670929059101208,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0032752111527625427
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.0635687916821055,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0020746952988998694
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.10294903334837356,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0034785078085129637
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.26443193955147987,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004508721603501755
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.12246686808714284,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0027865982212275067
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.1043770278231783,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0035484629358203635
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.2648398336113232,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004503000799632382
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.12355055054182587,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.002841014595626855
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 1,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_2.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.4687852352128821,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.037809750600987985
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.12939788800380767,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.004056238000462917
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.3205721940394599,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.004784451352989951
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.15485025651871712,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0034302178802564512
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.06272684866048131,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.002451752863709562
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.16040449198937512,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.003451151403396264
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.07545945954698206,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0021937365311268525
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.11538953087200472,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.00346589668028597
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.30175228398138576,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004432821529779081
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.14062442945039813,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0028845693981807787
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.11767324801231895,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0035688465184641432
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.3033452610052125,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004447592173686305
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.14250354572951518,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.002959772587239683
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 2,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_3.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.5387522714694768,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.023700066593108533
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.14028377513050538,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.004341036446200867
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.33808480225227105,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.004840897586860907
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.1660401631118107,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0037496181550606754
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.07249658632299814,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0029199126351436076
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.1738659001719329,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.003647562909324833
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.08448456167004659,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.002619780755266116
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.12529450659786737,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.003740919506421422
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.3182618579312013,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004525730253384199
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.15093871662748365,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0032225789949202046
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.12866365606890057,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.003897701131855631
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.3209266755398485,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004550196873941636
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.15379137375695537,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0033296076273151513
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 3,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_4.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.6154728892401495,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.05492052153457257
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.1416216780254008,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.004403560474088744
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.350369181874358,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.004821260815960307
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.16918194985742865,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.003726050287797417
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.07377427634131609,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0029769913238928973
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.18193245830782417,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.003657633632618109
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.08639654396535042,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0025466510150414677
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.12691040102952247,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0038030608678551996
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.33016396642156515,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004483672398798779
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.15419230913013102,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0031966205747524113
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.1301329876623986,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.003962229410505262
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.3326898264482343,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004514627367420683
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.15686161901707832,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.003303387308503824
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 4,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-web_nlg_en_PALM_prompt_5.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.6280080728696126,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.04207890973166607
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.15509132478375567,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.004705843597864089
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.36072408074578627,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.004932090632111422
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.1785781279253249,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.003864561091208021
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.08180998073756966,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0031797028106942695
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.1895120629191651,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0037580265385154963
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.09234282351104058,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0026995310439716595
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.13813720543380648,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.004053616600136467
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.33922496409575775,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004622505715509403
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.1620344936907299,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.003324512248892316
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.14168764741861828,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.004239364453934911
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.34078189394441194,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004625313010789047
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.16446961325308998,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0034245269422962608
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 5,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_0.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.14831832571904127,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0020031574591708096
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.23669379639579727,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.002619263402445374
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.16843326604027012,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0018787988149176356
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.027495279137204204,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0008008675778483577
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.04558631579219217,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0013034624565288761
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.03124497623150672,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.000826359540629872
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.11944058601706234,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0014613554843883904
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.197662087047554,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0022098401272736727
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.1373698337101501,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0014061340796650912
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.13662513471708013,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.001834224913870075
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.21848967035350617,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.00242500324850602
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.15530559329563356,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0017277092690194619
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 1.4416513225274035,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.07150581722616807
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 0,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.2474989333475308,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.004192775409890903
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.16589702996724112,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.002477684211999076
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.1595930665634592,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0020260063889951897
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.06456439217534583,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0026036519415425876
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.03395563608028862,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0011867665512589468
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.03442090774378983,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0011197085684525982
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.2007100384592662,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0036402244436710527
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.12968060330920403,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0019100623389590725
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.12543184498335516,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0015786460435038327
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.2356687196157363,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.004040097148447038
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.15692818247747958,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.0023149233432171795
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.15113313197846198,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0019006022698345565
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 1.9112541973999004,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.08163120654641273
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 1,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_2.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.3671552578777415,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.00464310641179873
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.18316424412100443,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.002534818661881543
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.20013093911819188,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0021449192871676984
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.11652986149569292,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0033165278944766326
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.048557963351484004,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0012897480876968325
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.05444986552431719,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0012835369876590637
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.3030100733810117,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.004160446544180668
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.1454519391726235,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0020102539956042787
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.1600577177279773,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0017194821563251488
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.34948494261958846,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.00452044298551405
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.17241597783925564,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.0023681037127358745
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.18899973850835908,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0020281161914398288
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 2.4920293719291493,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.06820740373159184
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 2,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_3.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.31853023306817924,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.005101746577027814
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.14722792301810253,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.0026467115286204013
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.16433308908585867,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.002402134739956357
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.10260882729326853,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0033488070271773295
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.04037648707007591,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0012615029724490608
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.04583954485462417,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0012897030660356447
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.26690611784240603,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.004530048771512807
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.11874183155575303,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0021251987841602225
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.13364146877613045,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.00195553742839865
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.3046492013868888,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.004949401564936648
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.1393491071946238,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.0024976587731928646
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.15587618164528091,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.002273493914941302
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 1.6055821821151304,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.06565825928935355
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 3,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_4.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.10914047578759525,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.004168163234199949
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.04875414041264775,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.0020018963702338544
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.05455257649733116,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.002012404693346845
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.037261877113724175,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.002437336913811512
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.013448412424643126,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0008658300088105147
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.01577378906241384,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0009385589100462621
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.09325777757588789,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.003680574837942309
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.03991105511182912,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0016279952050045603
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.045100032510421204,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0016666399875103265
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.10390247551362326,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.004006559051889402
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.04597031630948619,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.0018861168846922784
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.051422475813845084,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0018931617381586926
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 0.006932560692974423,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.0017990239865701577
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 4,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_GEM-wiki_lingua_en_tldr_en_5.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.019024182319545703,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0019567530452185085
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.00809080049652358,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.0008930277662354756
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.009250948371284507,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0009365636508956353
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.007878162042298584,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0012119853940782482
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.0024981499124714754,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.00036249889299460995
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.0031062476201456015,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.00043823448085888057
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.016781942931569222,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0017964368358031513
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.006703878314586593,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0007330971808896851
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.007784924334651645,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0007922903839285351
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.01855296243342967,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.001923182050974051
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.00783323994370169,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.0008689584836341853
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.008943947340808445,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0009064353361518755
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 1.5486400435190646e-20,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 5.928974165985541e-18
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 5,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_gem_xsum_article_DOC_summary_4.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_precision": 0.06536803730199292,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_precision_stderr": 0.0040080446277792475
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_recall": 0.0562578647719299,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_recall_stderr": 0.003466710519052521
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_fmeasure": 0.05638135159346031,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_fmeasure_stderr": 0.0033862796262482922
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_precision": 0.013453387419618108,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_precision_stderr": 0.0014082914388032289
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_recall": 0.012484556808642255,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_recall_stderr": 0.0012859869239756115
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_fmeasure": 0.01218356666574454,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_fmeasure_stderr": 0.0012325430366165824
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_precision": 0.049695294130091966,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_precision_stderr": 0.0031067522139507655
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_recall": 0.04191724945748147,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_recall_stderr": 0.0026229390757436582
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_fmeasure": 0.042197213021247744,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_fmeasure_stderr": 0.0025684294584781297
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_precision": 0.0501407378152416,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_precision_stderr": 0.00312422462775062
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_recall": 0.04244935232602974,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_recall_stderr": 0.0026544789695008624
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_fmeasure": 0.042656324273577836,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_fmeasure_stderr": 0.002590374442922286
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "bleu": 0.114133025618453,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "bleu_stderr": 0.03902166340342278
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 4,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/generation/slim.lm1-2b8-55b-oscarroots_gem_xsum_article_DOC_summary_5.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "results": [
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_precision": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_precision_stderr": 0.0
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_recall": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_recall_stderr": 0.0
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_fmeasure": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_fmeasure_stderr": 0.0
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_precision": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_precision_stderr": 0.0
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_recall": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_recall_stderr": 0.0
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_fmeasure": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_fmeasure_stderr": 0.0
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_precision": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_precision_stderr": 0.0
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_recall": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_recall_stderr": 0.0
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_fmeasure": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_fmeasure_stderr": 0.0
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_precision": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_precision_stderr": 0.0
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_recall": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_recall_stderr": 0.0
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_fmeasure": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_fmeasure_stderr": 0.0
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "bleu": 0.0,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "bleu_stderr": 0.0
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 5,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}

evaluation/rankeval/lm1-2b8-55b-oscarroots_0.csv ADDED Viewed

	@@ -0,0 +1,21 @@

+task,metric,value,err,version
+anli_r1,acc,0.322,0.014782913600996673,0
+anli_r2,acc,0.331,0.014888272588203936,0
+anli_r3,acc,0.3358333333333333,0.013639261190932887,0
+arc_challenge,acc,0.2380546075085324,0.012445770028026208,0
+arc_challenge,acc_norm,0.2627986348122867,0.012862523175351335,0
+arc_easy,acc,0.5513468013468014,0.010205540414612862,0
+arc_easy,acc_norm,0.49326599326599324,0.010258852980991825,0
+boolq,acc,0.5850152905198777,0.008617716361921567,1
+cb,acc,0.35714285714285715,0.0646095738380922,1
+cb,f1,0.24888576120103215,,1
+copa,acc,0.68,0.046882617226215034,0
+hellaswag,acc,0.38169687313284206,0.004848099661619686,0
+hellaswag,acc_norm,0.47689703246365267,0.004984452002563925,0
+piqa,acc,0.7121871599564744,0.01056325038305919,0
+piqa,acc_norm,0.7094668117519043,0.010592765034696534,0
+rte,acc,0.5270758122743683,0.030052303463143706,0
+sciq,acc,0.804,0.012559527926707378,0
+sciq,acc_norm,0.722,0.014174516461485247,0
+storycloze_2016,acc,0.6654195617316943,0.01091131896712794,0
+winogrande,acc,0.5138121546961326,0.014047122916440415,0

evaluation/rankeval/lm1-2b8-55b-oscarroots_0.json CHANGED Viewed

@@ -20,6 +20,52 @@
         "copa": {
             "acc": 0.68,
             "acc_stderr": 0.046882617226215034
         }
     },
     "versions": {
@@ -27,6 +73,15 @@
         "anli_r2": 0,
         "anli_r3": 0,
         "cb": 1,
-        "copa": 0
     }
 }

         "copa": {
             "acc": 0.68,
             "acc_stderr": 0.046882617226215034
+        },
+        "hellaswag": {
+            "acc": 0.38169687313284206,
+            "acc_stderr": 0.004848099661619686,
+            "acc_norm": 0.47689703246365267,
+            "acc_norm_stderr": 0.004984452002563925
+        },
+        "rte": {
+            "acc": 0.5270758122743683,
+            "acc_stderr": 0.030052303463143706
+        },
+        "winogrande": {
+            "acc": 0.5138121546961326,
+            "acc_stderr": 0.014047122916440415
+        },
+        "storycloze_2016": {
+            "acc": 0.6654195617316943,
+            "acc_stderr": 0.01091131896712794
+        },
+        "boolq": {
+            "acc": 0.5850152905198777,
+            "acc_stderr": 0.008617716361921567
+        },
+        "arc_easy": {
+            "acc": 0.5513468013468014,
+            "acc_stderr": 0.010205540414612862,
+            "acc_norm": 0.49326599326599324,
+            "acc_norm_stderr": 0.010258852980991825
+        },
+        "arc_challenge": {
+            "acc": 0.2380546075085324,
+            "acc_stderr": 0.012445770028026208,
+            "acc_norm": 0.2627986348122867,
+            "acc_norm_stderr": 0.012862523175351335
+        },
+        "sciq": {
+            "acc": 0.804,
+            "acc_stderr": 0.012559527926707378,
+            "acc_norm": 0.722,
+            "acc_norm_stderr": 0.014174516461485247
+        },
+        "piqa": {
+            "acc": 0.7121871599564744,
+            "acc_stderr": 0.01056325038305919,
+            "acc_norm": 0.7094668117519043,
+            "acc_norm_stderr": 0.010592765034696534
         }
     },
     "versions": {
         "anli_r2": 0,
         "anli_r3": 0,
         "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
     }
 }

evaluation/rankeval/lm1-2b8-55b-oscarroots_0_lm-eval_global_step52452_2023-02-25-11-16-27_0shots_backup.json DELETED Viewed

@@ -1,32 +0,0 @@
-{
-    "results": {
-        "anli_r1": {
-            "acc": 0.322,
-            "acc_stderr": 0.014782913600996673
-        },
-        "anli_r2": {
-            "acc": 0.331,
-            "acc_stderr": 0.014888272588203936
-        },
-        "anli_r3": {
-            "acc": 0.3358333333333333,
-            "acc_stderr": 0.013639261190932887
-        },
-        "cb": {
-            "acc": 0.35714285714285715,
-            "acc_stderr": 0.0646095738380922,
-            "f1": 0.24888576120103215
-        },
-        "copa": {
-            "acc": 0.68,
-            "acc_stderr": 0.046882617226215034
-        }
-    },
-    "versions": {
-        "anli_r1": 0,
-        "anli_r2": 0,
-        "anli_r3": 0,
-        "cb": 1,
-        "copa": 0
-    }
-}

evaluation/rankeval/lm1-2b8-55b-oscarroots_1.csv ADDED Viewed

	@@ -0,0 +1,21 @@

+task,metric,value,err,version
+anli_r1,acc,0.303,0.014539683710535255,0
+anli_r2,acc,0.319,0.014746404865473486,0
+anli_r3,acc,0.33416666666666667,0.013622434813136781,0
+arc_challenge,acc,0.2431740614334471,0.012536554144587092,0
+arc_challenge,acc_norm,0.28071672354948807,0.013131238126975583,0
+arc_easy,acc,0.5631313131313131,0.010177672928157681,0
+arc_easy,acc_norm,0.5273569023569024,0.010244415164390529,0
+boolq,acc,0.5813455657492355,0.008628545022868554,1
+cb,acc,0.48214285714285715,0.06737697508644648,1
+cb,f1,0.3270348837209302,,1
+copa,acc,0.66,0.04760952285695237,0
+hellaswag,acc,0.3815972913762199,0.004847857546957478,0
+hellaswag,acc_norm,0.477096195976897,0.004984543540932335,0
+piqa,acc,0.7078346028291621,0.010610252174513658,0
+piqa,acc_norm,0.6996735582154516,0.010695225308183145,0
+rte,acc,0.5306859205776173,0.03003973059219781,0
+sciq,acc,0.871,0.010605256784796565,0
+sciq,acc_norm,0.861,0.010945263761042963,0
+storycloze_2016,acc,0.655264564404062,0.01099083028205749,0
+winogrande,acc,0.5414364640883977,0.0140041468537919,0

evaluation/rankeval/lm1-2b8-55b-oscarroots_1.json CHANGED Viewed

@@ -20,6 +20,52 @@
         "copa": {
             "acc": 0.66,
             "acc_stderr": 0.04760952285695237
         }
     },
     "versions": {
@@ -27,6 +73,15 @@
         "anli_r2": 0,
         "anli_r3": 0,
         "cb": 1,
-        "copa": 0
     }
 }

         "copa": {
             "acc": 0.66,
             "acc_stderr": 0.04760952285695237
+        },
+        "hellaswag": {
+            "acc": 0.3815972913762199,
+            "acc_stderr": 0.004847857546957478,
+            "acc_norm": 0.477096195976897,
+            "acc_norm_stderr": 0.004984543540932335
+        },
+        "rte": {
+            "acc": 0.5306859205776173,
+            "acc_stderr": 0.03003973059219781
+        },
+        "winogrande": {
+            "acc": 0.5414364640883977,
+            "acc_stderr": 0.0140041468537919
+        },
+        "storycloze_2016": {
+            "acc": 0.655264564404062,
+            "acc_stderr": 0.01099083028205749
+        },
+        "boolq": {
+            "acc": 0.5813455657492355,
+            "acc_stderr": 0.008628545022868554
+        },
+        "arc_easy": {
+            "acc": 0.5631313131313131,
+            "acc_stderr": 0.010177672928157681,
+            "acc_norm": 0.5273569023569024,
+            "acc_norm_stderr": 0.010244415164390529
+        },
+        "arc_challenge": {
+            "acc": 0.2431740614334471,
+            "acc_stderr": 0.012536554144587092,
+            "acc_norm": 0.28071672354948807,
+            "acc_norm_stderr": 0.013131238126975583
+        },
+        "sciq": {
+            "acc": 0.871,
+            "acc_stderr": 0.010605256784796565,
+            "acc_norm": 0.861,
+            "acc_norm_stderr": 0.010945263761042963
+        },
+        "piqa": {
+            "acc": 0.7078346028291621,
+            "acc_stderr": 0.010610252174513658,
+            "acc_norm": 0.6996735582154516,
+            "acc_norm_stderr": 0.010695225308183145
         }
     },
     "versions": {
         "anli_r2": 0,
         "anli_r3": 0,
         "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
     }
 }

evaluation/rankeval/lm1-2b8-55b-oscarroots_1_lm-eval_global_step52452_2023-02-25-11-18-29_1shots_backup.json DELETED Viewed

@@ -1,32 +0,0 @@
-{
-    "results": {
-        "anli_r1": {
-            "acc": 0.303,
-            "acc_stderr": 0.014539683710535255
-        },
-        "anli_r2": {
-            "acc": 0.319,
-            "acc_stderr": 0.014746404865473486
-        },
-        "anli_r3": {
-            "acc": 0.33416666666666667,
-            "acc_stderr": 0.013622434813136781
-        },
-        "cb": {
-            "acc": 0.48214285714285715,
-            "acc_stderr": 0.06737697508644648,
-            "f1": 0.3270348837209302
-        },
-        "copa": {
-            "acc": 0.66,
-            "acc_stderr": 0.04760952285695237
-        }
-    },
-    "versions": {
-        "anli_r1": 0,
-        "anli_r2": 0,
-        "anli_r3": 0,
-        "cb": 1,
-        "copa": 0
-    }
-}

evaluation/rankeval/lm1-2b8-55b-oscarroots_2.csv ADDED Viewed

	@@ -0,0 +1,21 @@

+task,metric,value,err,version
+anli_r1,acc,0.311,0.014645596385722694,0
+anli_r2,acc,0.346,0.01505026612756444,0
+anli_r3,acc,0.325,0.013526454480351014,0
+arc_challenge,acc,0.24573378839590443,0.012581033453730114,0
+arc_challenge,acc_norm,0.27986348122866894,0.013119040897725922,0
+arc_easy,acc,0.5744949494949495,0.010145271182591021,0
+arc_easy,acc_norm,0.5471380471380471,0.010214087372211392,0
+boolq,acc,0.5629969418960244,0.008675365793227084,1
+cb,acc,0.4107142857142857,0.0663363415035954,1
+cb,f1,0.26927814732692784,,1
+copa,acc,0.68,0.04688261722621505,0
+hellaswag,acc,0.38149770961959767,0.00484761521647344,0
+hellaswag,acc_norm,0.4757020513841864,0.004983886091690525,0
+piqa,acc,0.7154515778019587,0.010527218464130605,0
+piqa,acc_norm,0.7105549510337323,0.01058101474067561,0
+rte,acc,0.5379061371841155,0.03000984891252912,0
+sciq,acc,0.885,0.010093407594904628,0
+sciq,acc_norm,0.88,0.010281328012747384,0
+storycloze_2016,acc,0.6547300908605024,0.010994860223187675,0
+winogrande,acc,0.5272296764009471,0.014031631629827696,0

evaluation/rankeval/lm1-2b8-55b-oscarroots_2.json CHANGED Viewed

@@ -20,6 +20,52 @@
         "copa": {
             "acc": 0.68,
             "acc_stderr": 0.04688261722621505
         }
     },
     "versions": {
@@ -27,6 +73,15 @@
         "anli_r2": 0,
         "anli_r3": 0,
         "cb": 1,
-        "copa": 0
     }
 }

         "copa": {
             "acc": 0.68,
             "acc_stderr": 0.04688261722621505
+        },
+        "hellaswag": {
+            "acc": 0.38149770961959767,
+            "acc_stderr": 0.00484761521647344,
+            "acc_norm": 0.4757020513841864,
+            "acc_norm_stderr": 0.004983886091690525
+        },
+        "rte": {
+            "acc": 0.5379061371841155,
+            "acc_stderr": 0.03000984891252912
+        },
+        "winogrande": {
+            "acc": 0.5272296764009471,
+            "acc_stderr": 0.014031631629827696
+        },
+        "storycloze_2016": {
+            "acc": 0.6547300908605024,
+            "acc_stderr": 0.010994860223187675
+        },
+        "boolq": {
+            "acc": 0.5629969418960244,
+            "acc_stderr": 0.008675365793227084
+        },
+        "arc_easy": {
+            "acc": 0.5744949494949495,
+            "acc_stderr": 0.010145271182591021,
+            "acc_norm": 0.5471380471380471,
+            "acc_norm_stderr": 0.010214087372211392
+        },
+        "arc_challenge": {
+            "acc": 0.24573378839590443,
+            "acc_stderr": 0.012581033453730114,
+            "acc_norm": 0.27986348122866894,
+            "acc_norm_stderr": 0.013119040897725922
+        },
+        "sciq": {
+            "acc": 0.885,
+            "acc_stderr": 0.010093407594904628,
+            "acc_norm": 0.88,
+            "acc_norm_stderr": 0.010281328012747384
+        },
+        "piqa": {
+            "acc": 0.7154515778019587,
+            "acc_stderr": 0.010527218464130605,
+            "acc_norm": 0.7105549510337323,
+            "acc_norm_stderr": 0.01058101474067561
         }
     },
     "versions": {
         "anli_r2": 0,
         "anli_r3": 0,
         "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
     }
 }