Muennighoff commited on
Commit
4fdcb9f
1 Parent(s): f1295b6
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. evaluation/agg.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json +1 -0
  3. evaluation/agg.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json +1 -0
  4. evaluation/agg.limited=3000.model=lm1-2b8-55b-nodups.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json +1 -0
  5. evaluation/agg.limited=3000.model=lm1-2b8-55b-nodups.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json +1 -0
  6. evaluation/examples.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.jsonl +0 -0
  7. evaluation/examples.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.jsonl +3 -0
  8. evaluation/examples.limited=3000.model=lm1-2b8-55b-nodups.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.jsonl +0 -0
  9. evaluation/examples.limited=3000.model=lm1-2b8-55b-nodups.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.jsonl +0 -0
  10. evaluation/merged.csv +5 -0
  11. evaluation/merged.json +1 -0
  12. evaluation/nodups-results_lm-eval_global_step52452_2023-01-21-08-16-27.csv +21 -0
  13. evaluation/nodups-results_lm-eval_global_step52452_2023-01-21-08-16-27.json +87 -0
  14. evaluation/slim.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json +133 -0
  15. evaluation/slim.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json +133 -0
  16. evaluation/slim.limited=3000.model=lm1-2b8-55b-nodups.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json +133 -0
  17. evaluation/slim.limited=3000.model=lm1-2b8-55b-nodups.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json +133 -0
  18. global_step52452/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  19. global_step52452/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt +3 -0
  20. global_step52452/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt +3 -0
  21. global_step52452/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt +3 -0
  22. global_step52452/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt +3 -0
  23. global_step52452/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt +3 -0
  24. global_step52452/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt +3 -0
  25. global_step52452/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt +3 -0
  26. global_step52452/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt +3 -0
  27. global_step52452/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt +3 -0
  28. global_step52452/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt +3 -0
  29. global_step52452/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
  30. global_step52452/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt +3 -0
  31. global_step52452/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt +3 -0
  32. global_step52452/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt +3 -0
  33. global_step52452/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt +3 -0
  34. global_step52452/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt +3 -0
  35. global_step52452/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt +3 -0
  36. global_step52452/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt +3 -0
  37. global_step52452/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt +3 -0
  38. global_step52452/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt +3 -0
  39. global_step52452/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt +3 -0
  40. global_step52452/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
  41. global_step52452/bf16_zero_pp_rank_120_mp_rank_00_optim_states.pt +3 -0
  42. global_step52452/bf16_zero_pp_rank_121_mp_rank_00_optim_states.pt +3 -0
  43. global_step52452/bf16_zero_pp_rank_122_mp_rank_00_optim_states.pt +3 -0
  44. global_step52452/bf16_zero_pp_rank_123_mp_rank_00_optim_states.pt +3 -0
  45. global_step52452/bf16_zero_pp_rank_124_mp_rank_00_optim_states.pt +3 -0
  46. global_step52452/bf16_zero_pp_rank_125_mp_rank_00_optim_states.pt +3 -0
  47. global_step52452/bf16_zero_pp_rank_126_mp_rank_00_optim_states.pt +3 -0
  48. global_step52452/bf16_zero_pp_rank_127_mp_rank_00_optim_states.pt +3 -0
  49. global_step52452/bf16_zero_pp_rank_128_mp_rank_00_optim_states.pt +3 -0
  50. global_step52452/bf16_zero_pp_rank_129_mp_rank_00_optim_states.pt +3 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ evaluation/examples.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation/agg.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.28029328196894115, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02480184024920872}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06329690360390738, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001387935330246365}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.31421418416573627, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004791161650456045}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.09746309685284041, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016849621638654459}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.027137503163609716, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007651343564527364}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1401257111559608, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003241586954388128}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04223874039871612, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001058848391939766}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.05932466454492388, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012690344678080382}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.29457496075642914, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004394633326456139}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.0915089018847029, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00153588410518572}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.060080846441887166, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013314718589465604}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.29569333412156357, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004414337835118563}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.09243599692729336, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016105636568568666}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-nodups/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
evaluation/agg.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.18474112904164633, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002267407181269061}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.27650424470884233, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027689275938439025}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.20094467016617557, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019315648928512489}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.04427928011450033, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010900420762454247}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.06774300806074234, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015532556924402066}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.04775726799786922, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009924171917991007}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.1403989637840109, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017027888022148028}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.21498569513652452, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002236435710308492}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.1533878305154272, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001403955983191417}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.17171619714033193, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0021119378832536255}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.2581270017082987, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026139033957512238}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.18704146707991842, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017997359690437693}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.3426766870517794, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06168807200033554}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-nodups/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
evaluation/agg.limited=3000.model=lm1-2b8-55b-nodups.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 6.208120134028529, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1090255908153869}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.31731355835055003, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004099274504817032}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.3324854292034225, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0036680738174009278}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.2919082916343828, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003144827416065499}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.1359171818938097, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0024425949074616932}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.13681320433598393, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0021042970297826253}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.12033860836317081, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018255484904436405}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.24305871613826696, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00324135869732354}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.2577033439038558, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0029570712013500815}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.22283049594155988, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0023764897834232346}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.2640833510596156, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00363828067497133}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.273011704389814, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0031687493131647084}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.2404191146065513, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002734721442980669}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-nodups/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
evaluation/agg.limited=3000.model=lm1-2b8-55b-nodups.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.11158867971698037, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016342679910338715}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.27563712740580626, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003703133787971269}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1568899973617697, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002189141052983194}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.018936864961514067, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000818057289793505}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.04792744421310345, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0020733476414230333}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.026811270052396363, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011483653848396338}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.09051718542621195, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012785710611048916}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.22518909744083276, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0030139050451273614}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.12746226399797717, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017205425134826698}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.08996878492074459, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013092110765833764}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.22428273464986798, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0031314258481570214}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.12677328059989124, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017734421929659534}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.9955908989230836, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07407336954067296}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-nodups/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
evaluation/examples.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
evaluation/examples.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b5c89a24483406f8cb7dfcd91df292385e66a61722a94a5d34c3ea27cb42a6
3
+ size 13234336
evaluation/examples.limited=3000.model=lm1-2b8-55b-nodups.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
evaluation/examples.limited=3000.model=lm1-2b8-55b-nodups.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
evaluation/merged.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ dataset,prompt,metric,value
2
+ e2e_nlg_cleaned,generate_text_restaurant,rouge2_fmeasure,0.12033860836317081
3
+ gem_xsum,article_DOC_summary,rouge2_fmeasure,0.026811270052396363
4
+ web_nlg_en,PALM_prompt,rouge2_fmeasure,0.04223874039871612
5
+ wiki_lingua_en,tldr_en,rouge2_fmeasure,0.04775726799786922
evaluation/merged.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"GEM/web_nlg_en": {"PALM_prompt": {"bleu": 0.28029328196894115, "bleu_stderr": 0.02480184024920872, "rouge1_fmeasure": 0.09746309685284041, "rouge1_fmeasure_stderr": 0.0016849621638654459, "rouge1_precision": 0.06329690360390738, "rouge1_precision_stderr": 0.001387935330246365, "rouge1_recall": 0.31421418416573627, "rouge1_recall_stderr": 0.004791161650456045, "rouge2_fmeasure": 0.04223874039871612, "rouge2_fmeasure_stderr": 0.001058848391939766, "rouge2_precision": 0.027137503163609716, "rouge2_precision_stderr": 0.0007651343564527364, "rouge2_recall": 0.1401257111559608, "rouge2_recall_stderr": 0.003241586954388128, "rougeL_fmeasure": 0.0915089018847029, "rougeL_fmeasure_stderr": 0.00153588410518572, "rougeL_precision": 0.05932466454492388, "rougeL_precision_stderr": 0.0012690344678080382, "rougeL_recall": 0.29457496075642914, "rougeL_recall_stderr": 0.004394633326456139, "rougeLsum_fmeasure": 0.09243599692729336, "rougeLsum_fmeasure_stderr": 0.0016105636568568666, "rougeLsum_precision": 0.060080846441887166, "rougeLsum_precision_stderr": 0.0013314718589465604, "rougeLsum_recall": 0.29569333412156357, "rougeLsum_recall_stderr": 0.004414337835118563}}, "GEM/wiki_lingua_en": {"tldr_en": {"bleu": 2.3426766870517794, "bleu_stderr": 0.06168807200033554, "rouge1_fmeasure": 0.20094467016617557, "rouge1_fmeasure_stderr": 0.0019315648928512489, "rouge1_precision": 0.18474112904164633, "rouge1_precision_stderr": 0.002267407181269061, "rouge1_recall": 0.27650424470884233, "rouge1_recall_stderr": 0.0027689275938439025, "rouge2_fmeasure": 0.04775726799786922, "rouge2_fmeasure_stderr": 0.0009924171917991007, "rouge2_precision": 0.04427928011450033, "rouge2_precision_stderr": 0.0010900420762454247, "rouge2_recall": 0.06774300806074234, "rouge2_recall_stderr": 0.0015532556924402066, "rougeL_fmeasure": 0.1533878305154272, "rougeL_fmeasure_stderr": 0.001403955983191417, "rougeL_precision": 0.1403989637840109, "rougeL_precision_stderr": 0.0017027888022148028, "rougeL_recall": 0.21498569513652452, "rougeL_recall_stderr": 0.002236435710308492, "rougeLsum_fmeasure": 0.18704146707991842, "rougeLsum_fmeasure_stderr": 0.0017997359690437693, "rougeLsum_precision": 0.17171619714033193, "rougeLsum_precision_stderr": 0.0021119378832536255, "rougeLsum_recall": 0.2581270017082987, "rougeLsum_recall_stderr": 0.0026139033957512238}}, "e2e_nlg_cleaned": {"generate_text_restaurant": {"bleu": 6.208120134028529, "bleu_stderr": 0.1090255908153869, "rouge1_fmeasure": 0.2919082916343828, "rouge1_fmeasure_stderr": 0.003144827416065499, "rouge1_precision": 0.31731355835055003, "rouge1_precision_stderr": 0.004099274504817032, "rouge1_recall": 0.3324854292034225, "rouge1_recall_stderr": 0.0036680738174009278, "rouge2_fmeasure": 0.12033860836317081, "rouge2_fmeasure_stderr": 0.0018255484904436405, "rouge2_precision": 0.1359171818938097, "rouge2_precision_stderr": 0.0024425949074616932, "rouge2_recall": 0.13681320433598393, "rouge2_recall_stderr": 0.0021042970297826253, "rougeL_fmeasure": 0.22283049594155988, "rougeL_fmeasure_stderr": 0.0023764897834232346, "rougeL_precision": 0.24305871613826696, "rougeL_precision_stderr": 0.00324135869732354, "rougeL_recall": 0.2577033439038558, "rougeL_recall_stderr": 0.0029570712013500815, "rougeLsum_fmeasure": 0.2404191146065513, "rougeLsum_fmeasure_stderr": 0.002734721442980669, "rougeLsum_precision": 0.2640833510596156, "rougeLsum_precision_stderr": 0.00363828067497133, "rougeLsum_recall": 0.273011704389814, "rougeLsum_recall_stderr": 0.0031687493131647084}}, "gem_xsum": {"article_DOC_summary": {"bleu": 0.9955908989230836, "bleu_stderr": 0.07407336954067296, "rouge1_fmeasure": 0.1568899973617697, "rouge1_fmeasure_stderr": 0.002189141052983194, "rouge1_precision": 0.11158867971698037, "rouge1_precision_stderr": 0.0016342679910338715, "rouge1_recall": 0.27563712740580626, "rouge1_recall_stderr": 0.003703133787971269, "rouge2_fmeasure": 0.026811270052396363, "rouge2_fmeasure_stderr": 0.0011483653848396338, "rouge2_precision": 0.018936864961514067, "rouge2_precision_stderr": 0.000818057289793505, "rouge2_recall": 0.04792744421310345, "rouge2_recall_stderr": 0.0020733476414230333, "rougeL_fmeasure": 0.12746226399797717, "rougeL_fmeasure_stderr": 0.0017205425134826698, "rougeL_precision": 0.09051718542621195, "rougeL_precision_stderr": 0.0012785710611048916, "rougeL_recall": 0.22518909744083276, "rougeL_recall_stderr": 0.0030139050451273614, "rougeLsum_fmeasure": 0.12677328059989124, "rougeLsum_fmeasure_stderr": 0.0017734421929659534, "rougeLsum_precision": 0.08996878492074459, "rougeLsum_precision_stderr": 0.0013092110765833764, "rougeLsum_recall": 0.22428273464986798, "rougeLsum_recall_stderr": 0.0031314258481570214}}}
evaluation/nodups-results_lm-eval_global_step52452_2023-01-21-08-16-27.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.332,0.014899597242811483,0
3
+ anli_r2,acc,0.337,0.014955087918653607,0
4
+ anli_r3,acc,0.3516666666666667,0.013789711695404792,0
5
+ arc_challenge,acc,0.24829351535836178,0.012624912868089762,0
6
+ arc_challenge,acc_norm,0.2713310580204778,0.012993807727545796,0
7
+ arc_easy,acc,0.5589225589225589,0.010188293221040569,0
8
+ arc_easy,acc_norm,0.494949494949495,0.010259260102565853,0
9
+ boolq,acc,0.5425076452599389,0.00871339478784262,1
10
+ cb,acc,0.30357142857142855,0.06199938655510753,1
11
+ cb,f1,0.2584656084656085,,1
12
+ copa,acc,0.79,0.040936018074033256,0
13
+ hellaswag,acc,0.4444333798048198,0.004958872288442143,0
14
+ hellaswag,acc_norm,0.5771758613821948,0.00492998369279507,0
15
+ piqa,acc,0.7464635473340587,0.010150090834551791,0
16
+ piqa,acc_norm,0.7540805223068553,0.01004733186562518,0
17
+ rte,acc,0.5306859205776173,0.030039730592197812,0
18
+ sciq,acc,0.816,0.012259457340938577,0
19
+ sciq,acc_norm,0.734,0.01397996564514515,0
20
+ storycloze_2016,acc,0.7156600748262961,0.01043161412866525,0
21
+ winogrande,acc,0.5722178374112076,0.013905134013839953,0
evaluation/nodups-results_lm-eval_global_step52452_2023-01-21-08-16-27.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "anli_r1": {
4
+ "acc": 0.332,
5
+ "acc_stderr": 0.014899597242811483
6
+ },
7
+ "anli_r2": {
8
+ "acc": 0.337,
9
+ "acc_stderr": 0.014955087918653607
10
+ },
11
+ "anli_r3": {
12
+ "acc": 0.3516666666666667,
13
+ "acc_stderr": 0.013789711695404792
14
+ },
15
+ "cb": {
16
+ "acc": 0.30357142857142855,
17
+ "acc_stderr": 0.06199938655510753,
18
+ "f1": 0.2584656084656085
19
+ },
20
+ "copa": {
21
+ "acc": 0.79,
22
+ "acc_stderr": 0.040936018074033256
23
+ },
24
+ "hellaswag": {
25
+ "acc": 0.4444333798048198,
26
+ "acc_stderr": 0.004958872288442143,
27
+ "acc_norm": 0.5771758613821948,
28
+ "acc_norm_stderr": 0.00492998369279507
29
+ },
30
+ "rte": {
31
+ "acc": 0.5306859205776173,
32
+ "acc_stderr": 0.030039730592197812
33
+ },
34
+ "winogrande": {
35
+ "acc": 0.5722178374112076,
36
+ "acc_stderr": 0.013905134013839953
37
+ },
38
+ "storycloze_2016": {
39
+ "acc": 0.7156600748262961,
40
+ "acc_stderr": 0.01043161412866525
41
+ },
42
+ "boolq": {
43
+ "acc": 0.5425076452599389,
44
+ "acc_stderr": 0.00871339478784262
45
+ },
46
+ "arc_easy": {
47
+ "acc": 0.5589225589225589,
48
+ "acc_stderr": 0.010188293221040569,
49
+ "acc_norm": 0.494949494949495,
50
+ "acc_norm_stderr": 0.010259260102565853
51
+ },
52
+ "arc_challenge": {
53
+ "acc": 0.24829351535836178,
54
+ "acc_stderr": 0.012624912868089762,
55
+ "acc_norm": 0.2713310580204778,
56
+ "acc_norm_stderr": 0.012993807727545796
57
+ },
58
+ "sciq": {
59
+ "acc": 0.816,
60
+ "acc_stderr": 0.012259457340938577,
61
+ "acc_norm": 0.734,
62
+ "acc_norm_stderr": 0.01397996564514515
63
+ },
64
+ "piqa": {
65
+ "acc": 0.7464635473340587,
66
+ "acc_stderr": 0.010150090834551791,
67
+ "acc_norm": 0.7540805223068553,
68
+ "acc_norm_stderr": 0.01004733186562518
69
+ }
70
+ },
71
+ "versions": {
72
+ "anli_r1": 0,
73
+ "anli_r2": 0,
74
+ "anli_r3": 0,
75
+ "cb": 1,
76
+ "copa": 0,
77
+ "hellaswag": 0,
78
+ "rte": 0,
79
+ "winogrande": 0,
80
+ "storycloze_2016": 0,
81
+ "boolq": 1,
82
+ "arc_easy": 0,
83
+ "arc_challenge": 0,
84
+ "sciq": 0,
85
+ "piqa": 0
86
+ }
87
+ }
evaluation/slim.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "GEM/web_nlg_en",
5
+ "prompt_name": "PALM_prompt",
6
+ "bleu": 0.28029328196894115,
7
+ "dataset_path": "GEM/web_nlg",
8
+ "dataset_name": "en",
9
+ "subset": null,
10
+ "bleu_stderr": 0.02480184024920872
11
+ },
12
+ {
13
+ "task_name": "GEM/web_nlg_en",
14
+ "prompt_name": "PALM_prompt",
15
+ "rouge1_precision": 0.06329690360390738,
16
+ "dataset_path": "GEM/web_nlg",
17
+ "dataset_name": "en",
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.001387935330246365
20
+ },
21
+ {
22
+ "task_name": "GEM/web_nlg_en",
23
+ "prompt_name": "PALM_prompt",
24
+ "rouge1_recall": 0.31421418416573627,
25
+ "dataset_path": "GEM/web_nlg",
26
+ "dataset_name": "en",
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.004791161650456045
29
+ },
30
+ {
31
+ "task_name": "GEM/web_nlg_en",
32
+ "prompt_name": "PALM_prompt",
33
+ "rouge1_fmeasure": 0.09746309685284041,
34
+ "dataset_path": "GEM/web_nlg",
35
+ "dataset_name": "en",
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.0016849621638654459
38
+ },
39
+ {
40
+ "task_name": "GEM/web_nlg_en",
41
+ "prompt_name": "PALM_prompt",
42
+ "rouge2_precision": 0.027137503163609716,
43
+ "dataset_path": "GEM/web_nlg",
44
+ "dataset_name": "en",
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.0007651343564527364
47
+ },
48
+ {
49
+ "task_name": "GEM/web_nlg_en",
50
+ "prompt_name": "PALM_prompt",
51
+ "rouge2_recall": 0.1401257111559608,
52
+ "dataset_path": "GEM/web_nlg",
53
+ "dataset_name": "en",
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.003241586954388128
56
+ },
57
+ {
58
+ "task_name": "GEM/web_nlg_en",
59
+ "prompt_name": "PALM_prompt",
60
+ "rouge2_fmeasure": 0.04223874039871612,
61
+ "dataset_path": "GEM/web_nlg",
62
+ "dataset_name": "en",
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.001058848391939766
65
+ },
66
+ {
67
+ "task_name": "GEM/web_nlg_en",
68
+ "prompt_name": "PALM_prompt",
69
+ "rougeL_precision": 0.05932466454492388,
70
+ "dataset_path": "GEM/web_nlg",
71
+ "dataset_name": "en",
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.0012690344678080382
74
+ },
75
+ {
76
+ "task_name": "GEM/web_nlg_en",
77
+ "prompt_name": "PALM_prompt",
78
+ "rougeL_recall": 0.29457496075642914,
79
+ "dataset_path": "GEM/web_nlg",
80
+ "dataset_name": "en",
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.004394633326456139
83
+ },
84
+ {
85
+ "task_name": "GEM/web_nlg_en",
86
+ "prompt_name": "PALM_prompt",
87
+ "rougeL_fmeasure": 0.0915089018847029,
88
+ "dataset_path": "GEM/web_nlg",
89
+ "dataset_name": "en",
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.00153588410518572
92
+ },
93
+ {
94
+ "task_name": "GEM/web_nlg_en",
95
+ "prompt_name": "PALM_prompt",
96
+ "rougeLsum_precision": 0.060080846441887166,
97
+ "dataset_path": "GEM/web_nlg",
98
+ "dataset_name": "en",
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.0013314718589465604
101
+ },
102
+ {
103
+ "task_name": "GEM/web_nlg_en",
104
+ "prompt_name": "PALM_prompt",
105
+ "rougeLsum_recall": 0.29569333412156357,
106
+ "dataset_path": "GEM/web_nlg",
107
+ "dataset_name": "en",
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.004414337835118563
110
+ },
111
+ {
112
+ "task_name": "GEM/web_nlg_en",
113
+ "prompt_name": "PALM_prompt",
114
+ "rougeLsum_fmeasure": 0.09243599692729336,
115
+ "dataset_path": "GEM/web_nlg",
116
+ "dataset_name": "en",
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.0016105636568568666
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-nodups/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 1,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
evaluation/slim.limited=3000.model=lm1-2b8-55b-nodups.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "GEM/wiki_lingua_en",
5
+ "prompt_name": "tldr_en",
6
+ "rouge1_precision": 0.18474112904164633,
7
+ "dataset_path": "GEM/wiki_lingua",
8
+ "dataset_name": "en",
9
+ "subset": null,
10
+ "rouge1_precision_stderr": 0.002267407181269061
11
+ },
12
+ {
13
+ "task_name": "GEM/wiki_lingua_en",
14
+ "prompt_name": "tldr_en",
15
+ "rouge1_recall": 0.27650424470884233,
16
+ "dataset_path": "GEM/wiki_lingua",
17
+ "dataset_name": "en",
18
+ "subset": null,
19
+ "rouge1_recall_stderr": 0.0027689275938439025
20
+ },
21
+ {
22
+ "task_name": "GEM/wiki_lingua_en",
23
+ "prompt_name": "tldr_en",
24
+ "rouge1_fmeasure": 0.20094467016617557,
25
+ "dataset_path": "GEM/wiki_lingua",
26
+ "dataset_name": "en",
27
+ "subset": null,
28
+ "rouge1_fmeasure_stderr": 0.0019315648928512489
29
+ },
30
+ {
31
+ "task_name": "GEM/wiki_lingua_en",
32
+ "prompt_name": "tldr_en",
33
+ "rouge2_precision": 0.04427928011450033,
34
+ "dataset_path": "GEM/wiki_lingua",
35
+ "dataset_name": "en",
36
+ "subset": null,
37
+ "rouge2_precision_stderr": 0.0010900420762454247
38
+ },
39
+ {
40
+ "task_name": "GEM/wiki_lingua_en",
41
+ "prompt_name": "tldr_en",
42
+ "rouge2_recall": 0.06774300806074234,
43
+ "dataset_path": "GEM/wiki_lingua",
44
+ "dataset_name": "en",
45
+ "subset": null,
46
+ "rouge2_recall_stderr": 0.0015532556924402066
47
+ },
48
+ {
49
+ "task_name": "GEM/wiki_lingua_en",
50
+ "prompt_name": "tldr_en",
51
+ "rouge2_fmeasure": 0.04775726799786922,
52
+ "dataset_path": "GEM/wiki_lingua",
53
+ "dataset_name": "en",
54
+ "subset": null,
55
+ "rouge2_fmeasure_stderr": 0.0009924171917991007
56
+ },
57
+ {
58
+ "task_name": "GEM/wiki_lingua_en",
59
+ "prompt_name": "tldr_en",
60
+ "rougeL_precision": 0.1403989637840109,
61
+ "dataset_path": "GEM/wiki_lingua",
62
+ "dataset_name": "en",
63
+ "subset": null,
64
+ "rougeL_precision_stderr": 0.0017027888022148028
65
+ },
66
+ {
67
+ "task_name": "GEM/wiki_lingua_en",
68
+ "prompt_name": "tldr_en",
69
+ "rougeL_recall": 0.21498569513652452,
70
+ "dataset_path": "GEM/wiki_lingua",
71
+ "dataset_name": "en",
72
+ "subset": null,
73
+ "rougeL_recall_stderr": 0.002236435710308492
74
+ },
75
+ {
76
+ "task_name": "GEM/wiki_lingua_en",
77
+ "prompt_name": "tldr_en",
78
+ "rougeL_fmeasure": 0.1533878305154272,
79
+ "dataset_path": "GEM/wiki_lingua",
80
+ "dataset_name": "en",
81
+ "subset": null,
82
+ "rougeL_fmeasure_stderr": 0.001403955983191417
83
+ },
84
+ {
85
+ "task_name": "GEM/wiki_lingua_en",
86
+ "prompt_name": "tldr_en",
87
+ "rougeLsum_precision": 0.17171619714033193,
88
+ "dataset_path": "GEM/wiki_lingua",
89
+ "dataset_name": "en",
90
+ "subset": null,
91
+ "rougeLsum_precision_stderr": 0.0021119378832536255
92
+ },
93
+ {
94
+ "task_name": "GEM/wiki_lingua_en",
95
+ "prompt_name": "tldr_en",
96
+ "rougeLsum_recall": 0.2581270017082987,
97
+ "dataset_path": "GEM/wiki_lingua",
98
+ "dataset_name": "en",
99
+ "subset": null,
100
+ "rougeLsum_recall_stderr": 0.0026139033957512238
101
+ },
102
+ {
103
+ "task_name": "GEM/wiki_lingua_en",
104
+ "prompt_name": "tldr_en",
105
+ "rougeLsum_fmeasure": 0.18704146707991842,
106
+ "dataset_path": "GEM/wiki_lingua",
107
+ "dataset_name": "en",
108
+ "subset": null,
109
+ "rougeLsum_fmeasure_stderr": 0.0017997359690437693
110
+ },
111
+ {
112
+ "task_name": "GEM/wiki_lingua_en",
113
+ "prompt_name": "tldr_en",
114
+ "bleu": 2.3426766870517794,
115
+ "dataset_path": "GEM/wiki_lingua",
116
+ "dataset_name": "en",
117
+ "subset": null,
118
+ "bleu_stderr": 0.06168807200033554
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-nodups/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 1,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
evaluation/slim.limited=3000.model=lm1-2b8-55b-nodups.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "e2e_nlg_cleaned",
5
+ "prompt_name": "generate_text_restaurant",
6
+ "bleu": 6.208120134028529,
7
+ "dataset_path": "e2e_nlg_cleaned",
8
+ "dataset_name": null,
9
+ "subset": null,
10
+ "bleu_stderr": 0.1090255908153869
11
+ },
12
+ {
13
+ "task_name": "e2e_nlg_cleaned",
14
+ "prompt_name": "generate_text_restaurant",
15
+ "rouge1_precision": 0.31731355835055003,
16
+ "dataset_path": "e2e_nlg_cleaned",
17
+ "dataset_name": null,
18
+ "subset": null,
19
+ "rouge1_precision_stderr": 0.004099274504817032
20
+ },
21
+ {
22
+ "task_name": "e2e_nlg_cleaned",
23
+ "prompt_name": "generate_text_restaurant",
24
+ "rouge1_recall": 0.3324854292034225,
25
+ "dataset_path": "e2e_nlg_cleaned",
26
+ "dataset_name": null,
27
+ "subset": null,
28
+ "rouge1_recall_stderr": 0.0036680738174009278
29
+ },
30
+ {
31
+ "task_name": "e2e_nlg_cleaned",
32
+ "prompt_name": "generate_text_restaurant",
33
+ "rouge1_fmeasure": 0.2919082916343828,
34
+ "dataset_path": "e2e_nlg_cleaned",
35
+ "dataset_name": null,
36
+ "subset": null,
37
+ "rouge1_fmeasure_stderr": 0.003144827416065499
38
+ },
39
+ {
40
+ "task_name": "e2e_nlg_cleaned",
41
+ "prompt_name": "generate_text_restaurant",
42
+ "rouge2_precision": 0.1359171818938097,
43
+ "dataset_path": "e2e_nlg_cleaned",
44
+ "dataset_name": null,
45
+ "subset": null,
46
+ "rouge2_precision_stderr": 0.0024425949074616932
47
+ },
48
+ {
49
+ "task_name": "e2e_nlg_cleaned",
50
+ "prompt_name": "generate_text_restaurant",
51
+ "rouge2_recall": 0.13681320433598393,
52
+ "dataset_path": "e2e_nlg_cleaned",
53
+ "dataset_name": null,
54
+ "subset": null,
55
+ "rouge2_recall_stderr": 0.0021042970297826253
56
+ },
57
+ {
58
+ "task_name": "e2e_nlg_cleaned",
59
+ "prompt_name": "generate_text_restaurant",
60
+ "rouge2_fmeasure": 0.12033860836317081,
61
+ "dataset_path": "e2e_nlg_cleaned",
62
+ "dataset_name": null,
63
+ "subset": null,
64
+ "rouge2_fmeasure_stderr": 0.0018255484904436405
65
+ },
66
+ {
67
+ "task_name": "e2e_nlg_cleaned",
68
+ "prompt_name": "generate_text_restaurant",
69
+ "rougeL_precision": 0.24305871613826696,
70
+ "dataset_path": "e2e_nlg_cleaned",
71
+ "dataset_name": null,
72
+ "subset": null,
73
+ "rougeL_precision_stderr": 0.00324135869732354
74
+ },
75
+ {
76
+ "task_name": "e2e_nlg_cleaned",
77
+ "prompt_name": "generate_text_restaurant",
78
+ "rougeL_recall": 0.2577033439038558,
79
+ "dataset_path": "e2e_nlg_cleaned",
80
+ "dataset_name": null,
81
+ "subset": null,
82
+ "rougeL_recall_stderr": 0.0029570712013500815
83
+ },
84
+ {
85
+ "task_name": "e2e_nlg_cleaned",
86
+ "prompt_name": "generate_text_restaurant",
87
+ "rougeL_fmeasure": 0.22283049594155988,
88
+ "dataset_path": "e2e_nlg_cleaned",
89
+ "dataset_name": null,
90
+ "subset": null,
91
+ "rougeL_fmeasure_stderr": 0.0023764897834232346
92
+ },
93
+ {
94
+ "task_name": "e2e_nlg_cleaned",
95
+ "prompt_name": "generate_text_restaurant",
96
+ "rougeLsum_precision": 0.2640833510596156,
97
+ "dataset_path": "e2e_nlg_cleaned",
98
+ "dataset_name": null,
99
+ "subset": null,
100
+ "rougeLsum_precision_stderr": 0.00363828067497133
101
+ },
102
+ {
103
+ "task_name": "e2e_nlg_cleaned",
104
+ "prompt_name": "generate_text_restaurant",
105
+ "rougeLsum_recall": 0.273011704389814,
106
+ "dataset_path": "e2e_nlg_cleaned",
107
+ "dataset_name": null,
108
+ "subset": null,
109
+ "rougeLsum_recall_stderr": 0.0031687493131647084
110
+ },
111
+ {
112
+ "task_name": "e2e_nlg_cleaned",
113
+ "prompt_name": "generate_text_restaurant",
114
+ "rougeLsum_fmeasure": 0.2404191146065513,
115
+ "dataset_path": "e2e_nlg_cleaned",
116
+ "dataset_name": null,
117
+ "subset": null,
118
+ "rougeLsum_fmeasure_stderr": 0.002734721442980669
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-nodups/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 1,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
evaluation/slim.limited=3000.model=lm1-2b8-55b-nodups.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-20T22:42:14.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": [
3
+ {
4
+ "task_name": "gem_xsum",
5
+ "prompt_name": "article_DOC_summary",
6
+ "rouge1_precision": 0.11158867971698037,
7
+ "dataset_path": "GEM/xsum",
8
+ "dataset_name": null,
9
+ "subset": "",
10
+ "rouge1_precision_stderr": 0.0016342679910338715
11
+ },
12
+ {
13
+ "task_name": "gem_xsum",
14
+ "prompt_name": "article_DOC_summary",
15
+ "rouge1_recall": 0.27563712740580626,
16
+ "dataset_path": "GEM/xsum",
17
+ "dataset_name": null,
18
+ "subset": "",
19
+ "rouge1_recall_stderr": 0.003703133787971269
20
+ },
21
+ {
22
+ "task_name": "gem_xsum",
23
+ "prompt_name": "article_DOC_summary",
24
+ "rouge1_fmeasure": 0.1568899973617697,
25
+ "dataset_path": "GEM/xsum",
26
+ "dataset_name": null,
27
+ "subset": "",
28
+ "rouge1_fmeasure_stderr": 0.002189141052983194
29
+ },
30
+ {
31
+ "task_name": "gem_xsum",
32
+ "prompt_name": "article_DOC_summary",
33
+ "rouge2_precision": 0.018936864961514067,
34
+ "dataset_path": "GEM/xsum",
35
+ "dataset_name": null,
36
+ "subset": "",
37
+ "rouge2_precision_stderr": 0.000818057289793505
38
+ },
39
+ {
40
+ "task_name": "gem_xsum",
41
+ "prompt_name": "article_DOC_summary",
42
+ "rouge2_recall": 0.04792744421310345,
43
+ "dataset_path": "GEM/xsum",
44
+ "dataset_name": null,
45
+ "subset": "",
46
+ "rouge2_recall_stderr": 0.0020733476414230333
47
+ },
48
+ {
49
+ "task_name": "gem_xsum",
50
+ "prompt_name": "article_DOC_summary",
51
+ "rouge2_fmeasure": 0.026811270052396363,
52
+ "dataset_path": "GEM/xsum",
53
+ "dataset_name": null,
54
+ "subset": "",
55
+ "rouge2_fmeasure_stderr": 0.0011483653848396338
56
+ },
57
+ {
58
+ "task_name": "gem_xsum",
59
+ "prompt_name": "article_DOC_summary",
60
+ "rougeL_precision": 0.09051718542621195,
61
+ "dataset_path": "GEM/xsum",
62
+ "dataset_name": null,
63
+ "subset": "",
64
+ "rougeL_precision_stderr": 0.0012785710611048916
65
+ },
66
+ {
67
+ "task_name": "gem_xsum",
68
+ "prompt_name": "article_DOC_summary",
69
+ "rougeL_recall": 0.22518909744083276,
70
+ "dataset_path": "GEM/xsum",
71
+ "dataset_name": null,
72
+ "subset": "",
73
+ "rougeL_recall_stderr": 0.0030139050451273614
74
+ },
75
+ {
76
+ "task_name": "gem_xsum",
77
+ "prompt_name": "article_DOC_summary",
78
+ "rougeL_fmeasure": 0.12746226399797717,
79
+ "dataset_path": "GEM/xsum",
80
+ "dataset_name": null,
81
+ "subset": "",
82
+ "rougeL_fmeasure_stderr": 0.0017205425134826698
83
+ },
84
+ {
85
+ "task_name": "gem_xsum",
86
+ "prompt_name": "article_DOC_summary",
87
+ "rougeLsum_precision": 0.08996878492074459,
88
+ "dataset_path": "GEM/xsum",
89
+ "dataset_name": null,
90
+ "subset": "",
91
+ "rougeLsum_precision_stderr": 0.0013092110765833764
92
+ },
93
+ {
94
+ "task_name": "gem_xsum",
95
+ "prompt_name": "article_DOC_summary",
96
+ "rougeLsum_recall": 0.22428273464986798,
97
+ "dataset_path": "GEM/xsum",
98
+ "dataset_name": null,
99
+ "subset": "",
100
+ "rougeLsum_recall_stderr": 0.0031314258481570214
101
+ },
102
+ {
103
+ "task_name": "gem_xsum",
104
+ "prompt_name": "article_DOC_summary",
105
+ "rougeLsum_fmeasure": 0.12677328059989124,
106
+ "dataset_path": "GEM/xsum",
107
+ "dataset_name": null,
108
+ "subset": "",
109
+ "rougeLsum_fmeasure_stderr": 0.0017734421929659534
110
+ },
111
+ {
112
+ "task_name": "gem_xsum",
113
+ "prompt_name": "article_DOC_summary",
114
+ "bleu": 0.9955908989230836,
115
+ "dataset_path": "GEM/xsum",
116
+ "dataset_name": null,
117
+ "subset": "",
118
+ "bleu_stderr": 0.07407336954067296
119
+ }
120
+ ],
121
+ "config": {
122
+ "model": "hf-causal",
123
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-nodups/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
124
+ "task_args": "",
125
+ "num_fewshot": 1,
126
+ "batch_size": 16,
127
+ "device": "cuda",
128
+ "use_cache": false,
129
+ "limit": 3000,
130
+ "bootstrap_iters": 10,
131
+ "seed": 1234
132
+ }
133
+ }
global_step52452/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:787116bd311bfa10becf061add157a86a22ebcb37dfc57ac4fda19cb6d5196fc
3
+ size 131677719
global_step52452/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2089d9c2671e959a6dba04afdd5c45f6c37ed54be6442a6287e47bf8c6b29ea
3
+ size 131677805
global_step52452/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8abe5ff3e2089debb15c65dc88a67581df74f3dc0e02a3b30592e1e7ee53de3c
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781929a0838babbf60d761170db1965c53b569521c6a2eb27556eb7b3caa1760
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:357a6ff0b0f8409dd714f52a1978324d2c989d9f40a769e69aa484e30e106552
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37658f2eb228beafca97dac455f92fb3989b7d0cf13ec8f6a586c992b3692565
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a4ddb32524c30af9e104a7d0ccc02114a66c244a762b8231c2625ab21d1b18
3
+ size 131677677
global_step52452/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec70007672a52b1a57eaa6711017a634117fe5ed75c79aad3f5c6f23a0e1fea5
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a323c42be41ea31dbc2b65a5bbf594881f61af678e8f537f3715274044fceb28
3
+ size 131677805
global_step52452/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a55966272b34082785f3d378c1d72619cfca55e9e6232b80d7dedaed06678bfc
3
+ size 131677869
global_step52452/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1a1c8973a86b9b7a95d53b3db3af1035c2cbeb9faac6c295d6208a0f38ceff4
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b8b1383322c010532195b3a2b2cd28bc65191e2db742efb9f78082a661acc5c
3
+ size 131677794
global_step52452/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa9dcce6be45faf0f8e6b203dbb1fc863afa5c25841aa8741c7cc60f61d2d43c
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:012e31e784d79a20394930d5ae9dc689fb27e1a7d53b8d6dc809ca977ebab1ed
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba7f6d9bdce9a69e6a1063033e15e583d4c580a2ad1e2f75659cf341fe2fede
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6d39ad86c06d9bb38599cfd068c08fa964a65f1a13c46c0aa14b8dc36021cf4
3
+ size 131677805
global_step52452/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba760f5574164e26e192865b53eaf83fc38d68bad2f031b1979fcbf16c57a46
3
+ size 131677677
global_step52452/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7335ac5095060cdab6f8cb6e3cc8a2b8416220db52f11e9c9fd6b4d95c0bae63
3
+ size 131677805
global_step52452/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be644e89f90da6a0e6e46ce6cf19f14fb91f85193474f60f7e9604d12ede8a05
3
+ size 131677805
global_step52452/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dc0cb2a9185b948b519d3bb34c92b41999895478e484ee37de609b2722b914a
3
+ size 131677677
global_step52452/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3939cee12fedf69c4aa80c70c55f40816e1175014c1b50574bfea1c827cc95ff
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3fd12c301faae6f1716254e6214f0c4655a0c13a3bb1877ca84bc2cfd3c0ba
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f4fc619cbf68753e50c9ee9e0947fca5aaf5ea6a42667bd20810cd14c717b91
3
+ size 131677730
global_step52452/bf16_zero_pp_rank_120_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:629755caf57159ff657af603f6b7861e7d6315a267ab5cb33f19e0235e9e0dc0
3
+ size 131677677
global_step52452/bf16_zero_pp_rank_121_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97784dd0e50d8c08a935cd6f7dae5155242c9fce55b9b756b3f00ecf8213bc18
3
+ size 131677805
global_step52452/bf16_zero_pp_rank_122_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bd148ee37d89504738ab45dfa8ee365e162e16d3f8cfc9d34c8fb92a53d7cd9
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_123_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a741dbc6d3478edb4398a40c73818a2a8fefb2b478c753e64546472b1f7f97c9
3
+ size 131677869
global_step52452/bf16_zero_pp_rank_124_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:095d9e21140105edff7bf7ed24fda024c5bda12ce195b3f915fd86e2bf006204
3
+ size 131677805
global_step52452/bf16_zero_pp_rank_125_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fae89dd4d6f84072d796daa837481320c355e4964eddad25525d8c230f01a755
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_126_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3a60294562d0c1b5b876527b6547e5195afadc6625fd95f7a420e6d9ca58eb
3
+ size 131677805
global_step52452/bf16_zero_pp_rank_127_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45085f64177a1e90eb6158db3b46179ebdf1551ca0eb17af8d13cd06a1d1c9f9
3
+ size 131677677
global_step52452/bf16_zero_pp_rank_128_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba5262fe9f5dd52bd35da5a016166122c29b4ddbf963bdc30965902e0d756040
3
+ size 131677741
global_step52452/bf16_zero_pp_rank_129_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9346378b9adf56f95bb8c134371955cae3a83fe45b4c2119076677ce64b9dd9d
3
+ size 131677677