diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_0.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..b52fcbe888edbc2c2c12db342caa1fbb2b2f7f7c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.2304122240550486, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02282542594810443}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.060241646379365375, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018281076733804075}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.24592443008643936, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00451257736641835}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.088654136660333, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018667826313676483}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.028611694237188352, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001256141851839684}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1211577510207888, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0030274510689492543}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04185850357277026, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011315772112214053}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.059215633929813966, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017815112040087317}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2428343490294157, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004471374721999319}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08728570338341435, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001811977941125175}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.057472515102994094, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017658517107930081}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2343449114940915, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004180785536759541}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08440847938326362, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017495023319111394}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_1.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..9937b6b98d9f5f518fe52226417bda16e0785185 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.24093765005017717, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0230030873247297}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06608421860053589, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002336019334031938}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.21220436916634114, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004053789065265438}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08727381267513917, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002019085465483018}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.026958060365485183, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0014325663154050872}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09594460677384929, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002636689924376356}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03690045692783099, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011932344773442297}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.0627130320531275, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0022019609556712485}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.20593946270610541, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003942128258116945}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08339382486023829, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018750612270331736}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06314633303729772, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00225899090820857}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.20495711011670537, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003873468183707786}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08340965215556247, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001888288916759086}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_2.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f8290554b3b5c90ce3495be05174da5b1adddc95 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.233031239019028, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.018622989143777895}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.0669033461265895, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0022457356623633616}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2210311245370995, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004136964100813092}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.0894179415607547, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020679188927635165}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.02737441536683152, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0012163404235599126}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10228061138551528, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0027171296640774193}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03847532772409176, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011774128694059217}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.0627740873189878, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0020419307238977585}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21409346975466498, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004009271610351055}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08484682008716848, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018654847610348828}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06306585978755326, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002078016863885293}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.21306042006535458, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003959496879716326}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08486627231660553, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018862109332949393}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_3.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..3baef5eb3dec305164b3929418e40b42bbdcc532 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.23948204833652226, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02987129954951102}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06118912078285516, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018874100598924987}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.21690381058073044, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004037786222557515}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08516966035997384, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019653410958603845}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.025691103456522035, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009761929133606694}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09913747741149387, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0026775514547381393}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.036941223370484465, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011451903532238477}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.057680973751972266, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001710672833760511}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21034092384649808, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003897565257297013}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08113455126274569, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017918361111978508}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.058077399704829655, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017441871631557635}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2102632726423538, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003885092717560431}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08135704576559996, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001809777621388229}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_4.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b07394ca7ed8d4ee47f8d8df27c6a19af567bda7 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.23249910495908727, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.029070914247746167}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06371462011124131, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019856815389907496}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2217109306879308, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003947138405833181}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08752577226961719, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001988775968059262}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.026831790674440324, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001147287521474585}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10130367868890543, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0025744699017443726}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03752454514172853, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001129399736645277}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.05966276178114979, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017451674391315488}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21514059835508997, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0038265964816655085}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08315980831393469, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017913382350644002}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.060216714540077834, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017985198514999731}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.21496362989260864, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0038038383785956613}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08355071339347263, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018285792853000794}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_5.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..12834e95edcac8bd1d756e7d6db99e01e12e64de --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.22081286066426478, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02274325142571731}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06624132704477384, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002139644596424952}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.22692456733410507, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004091333470382997}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.09147989131946721, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002205390270555857}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.02768138829423821, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0011951077136256075}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10373515662428917, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0027182295029531743}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03903350582230713, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00122131545348737}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06135390440151855, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001830142897495819}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2187629645915043, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003916988813854803}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.0861662328266173, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019549257073660665}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06154502173900534, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018661465944842495}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.21792767487533674, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0038849552847416537}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.0861577572212862, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019790861267728415}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_0.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_0.json new file mode 100644 index 0000000000000000000000000000000000000000..6966829aeecef94ea933e8f62785dd0908759b52 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_precision": 0.021212341576732085, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0007573290174140969}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_recall": 0.15394872315339908, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025907246130094292}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_fmeasure": 0.03406862468978302, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0007291397354522542}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_precision": 0.0007330327285282334, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00013102839162176146}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_recall": 0.0060791270967935854, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0005096344336408659}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_fmeasure": 0.0010295704236990917, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 9.29161964969125e-05}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_precision": 0.020816161702046237, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0006635945167961655}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_recall": 0.15359448098622203, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00258622832573184}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_fmeasure": 0.033775131528634185, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0006877591126027594}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_precision": 0.014879313282812858, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0006265155007215373}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_recall": 0.11208211458026511, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.001969123293960845}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_fmeasure": 0.023579045736592925, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005251315581581948}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "bleu": 0.005621069280208802, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.00010070462717428731}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_1.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_1.json new file mode 100644 index 0000000000000000000000000000000000000000..7c48393216a7c97b87d6fa594b510908cd820691 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_precision": 0.1725932493840725, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002529386481031639}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_recall": 0.1604306963623193, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002484650213252913}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_fmeasure": 0.14612427698267197, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018059192168654533}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_precision": 0.018139617142187602, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001178810295076615}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_recall": 0.017838294270011764, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0010680407233547252}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_fmeasure": 0.015048493432535332, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008510426237769514}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_precision": 0.1452889883810266, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0021500071787494027}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_recall": 0.13484344118305408, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020730900587942985}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_fmeasure": 0.12193728452169696, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014342614701120422}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_precision": 0.1538719868843642, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0022759439069310527}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_recall": 0.1423996572873628, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002170737056924197}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_fmeasure": 0.12940784544932987, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015587985285077882}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "bleu": 0.7928900478653991, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13053518516004906}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_2.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_2.json new file mode 100644 index 0000000000000000000000000000000000000000..3b5512469f95bda0b0a84b9cc9bfae4bf59c9bad --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_precision": 0.21086754974453098, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003481087853726294}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_recall": 0.25038766375814814, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004322057797344624}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_fmeasure": 0.1907465620653806, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0025141994758677614}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_precision": 0.046020553876655636, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002081339235092095}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_recall": 0.06454664623813919, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0024885004130050735}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_fmeasure": 0.04229150796312311, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016292319117673752}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_precision": 0.17652392027199135, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003026510309888636}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_recall": 0.2138167297173281, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003888991530572478}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_fmeasure": 0.15945985445699104, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00211755282820529}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_precision": 0.18652878286693028, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003173835353071881}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_recall": 0.21970003626695772, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003760387533497808}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_fmeasure": 0.16758058695841305, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002234265089238999}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "bleu": 1.035688482397717, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07979180056283594}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_3.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_3.json new file mode 100644 index 0000000000000000000000000000000000000000..d4a3e0d9a11308f4cc207f550429ed8566149652 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_precision": 0.24945289061580536, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004234785605850604}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_recall": 0.27695089051506727, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004409164518913342}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_fmeasure": 0.2155827425525286, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0028813429865885256}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_precision": 0.06789221743946505, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002570766391550549}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_recall": 0.08374766031914743, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002770294903947266}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_fmeasure": 0.05870693940495815, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0019495378220205084}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_precision": 0.20844479328303336, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0036153889991536138}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_recall": 0.2362160338231045, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003930516775517815}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_fmeasure": 0.1799141924288802, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0023927111472121488}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_precision": 0.21975278899164405, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0038037794972502884}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_recall": 0.24068798176788908, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0037486405001691237}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_fmeasure": 0.18810543014252795, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00247964907125972}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "bleu": 1.4760612258997623, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1344204674137375}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_4.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_4.json new file mode 100644 index 0000000000000000000000000000000000000000..4080359bbf269af6c2fe14f22d76af5407353cac --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_precision": 0.2843113295309021, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004668202372322337}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_recall": 0.2840533638331491, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004212420357643946}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_fmeasure": 0.23277564344319238, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003033324883659851}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_precision": 0.0855143860050401, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0028864190680247532}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_recall": 0.09058538636969998, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002732539141359347}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_fmeasure": 0.06941913517884618, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0021154296895269154}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_precision": 0.24061329765622666, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0040548049932821436}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_recall": 0.2451732467444115, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003800935008924143}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_fmeasure": 0.1969438842373623, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002591786476274177}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_precision": 0.2513996234316726, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004211644541437699}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_recall": 0.24901463870097312, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036333762169696126}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_fmeasure": 0.20445884584732335, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002660931367820124}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "bleu": 1.4525772102533077, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09991573236218197}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_5.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_5.json new file mode 100644 index 0000000000000000000000000000000000000000..90270a61dad262c2e4d664c320612eb4dd3b18d8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_precision": 0.3079953699411845, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0052262700820691855}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_recall": 0.2990488948523553, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004288889844947112}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge1_fmeasure": 0.24428760561857069, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0032770994487209613}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_precision": 0.09878776942732348, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.003295722862390014}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_recall": 0.09931014156182742, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00285802935368016}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rouge2_fmeasure": 0.07628545628688663, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0022936944399277277}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_precision": 0.26091647796513057, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004518606930149643}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_recall": 0.25924988114075476, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003916835233501827}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeL_fmeasure": 0.20682176993182852, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002772758722894698}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_precision": 0.2723426426961616, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004751841751952048}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_recall": 0.26102854194007674, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0037065483496949393}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "rougeLsum_fmeasure": 0.21367093331703443, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0028579521145505734}, {"task_name": "GEM/web_nlg_en", "prompt_name": "explicit-graph-description2", "bleu": 1.5535486678139987, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "afeec167-f75f-4687-a775-1efde7d04780", "prompt_jinja": "{{input | join(\", \")}}. \n\nThe above is a set of subject | predicate | object expressions separated by commas: \nWrite all the information in proper sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12636178897739148}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_0.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_0.json new file mode 100644 index 0000000000000000000000000000000000000000..cbfbb3674d5ec938a3d5d9f6d5592063afbbe4e8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "bleu": 0.06678968022431767, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.013291302987332878}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_precision": 0.029914442023692294, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013899685699484238}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_recall": 0.17191853554387962, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027258688235086617}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_fmeasure": 0.04180906676802502, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.000892515049715985}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_precision": 0.002770987452397337, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00026930245436038245}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_recall": 0.018458908430645037, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012055847911475867}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_fmeasure": 0.0043358346468287505, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003699728130996654}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_precision": 0.028773567135668157, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012807801393214825}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_recall": 0.16962928822415665, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0026788882785198187}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_fmeasure": 0.04069689961336112, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0008129334087216642}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_precision": 0.023429395403589064, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012857906320802946}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_recall": 0.13119208466933271, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0020710838890581453}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_fmeasure": 0.03168737424504007, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0007282033005749001}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_1.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_1.json new file mode 100644 index 0000000000000000000000000000000000000000..133ffeb26b7257aa7446d0413e44c0480e6fec5c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "bleu": 0.7343903998488392, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1264580334548859}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_precision": 0.1713694988441509, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0025683178287328103}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_recall": 0.1620927633038664, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026004642336094302}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_fmeasure": 0.1451954506401401, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018476593880312048}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_precision": 0.018149248560522465, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0011980307264669568}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_recall": 0.018701683687898776, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0011131166018444143}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_fmeasure": 0.015147654240964752, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008531589600636162}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_precision": 0.1438174575375414, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002179050759564507}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_recall": 0.13648488466088793, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002207259216754237}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_fmeasure": 0.12085106733704139, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014628495715754606}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_precision": 0.1522841828379344, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002306578353042297}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_recall": 0.14316734345889626, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002243661271505656}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_fmeasure": 0.12811642696509545, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015874918061787038}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_2.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f0b6e51d3988493a5c54f7de8cdb3918152abeac --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "bleu": 0.8950469728314032, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06813864403493262}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_precision": 0.2245739569864698, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00394768209564399}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_recall": 0.2605817818040319, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004397225408663707}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_fmeasure": 0.19598922586079662, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002726474308866776}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_precision": 0.05842649937474419, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0025228225800757995}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_recall": 0.07857813168613167, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002700572476720445}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_fmeasure": 0.05131564722525416, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018242084011574487}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_precision": 0.19008397370997976, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003446693936021226}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_recall": 0.2280794378801942, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004143086530714078}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_fmeasure": 0.16612890671250619, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002321135524749462}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_precision": 0.19875997968955347, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003614311300045863}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_recall": 0.22698955916927938, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0037471810061475252}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_fmeasure": 0.17165207996551565, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0024000508163840063}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_3.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_3.json new file mode 100644 index 0000000000000000000000000000000000000000..aee0703c1e004589dc9186ab1b4e089a4a5f641d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "bleu": 1.0328999153691525, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15218614003775394}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_precision": 0.2561698960860146, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004586388937239763}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_recall": 0.2889121821334627, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004530658302121611}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_fmeasure": 0.21578406129162228, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003014899947915645}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_precision": 0.0756534526059316, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0028609493658018873}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_recall": 0.09619008514491978, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0029113141102096165}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_fmeasure": 0.06387420889427227, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020202475334694754}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_precision": 0.2162947950404591, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003924794592166209}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_recall": 0.25398159222153943, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004265191984478587}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_fmeasure": 0.1831681982588611, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0025326526566856545}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_precision": 0.22494600706563603, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00410385917148987}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_recall": 0.25165737151714396, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0038988712792617612}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_fmeasure": 0.18805417860233245, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0026019608331974494}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_4.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_4.json new file mode 100644 index 0000000000000000000000000000000000000000..600da82668f321792489bee607dda98a71bba7b4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "bleu": 1.1848438797934058, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10113709897455576}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_precision": 0.29191494117074934, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0048753981466979806}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_recall": 0.2982482419805292, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004362410145983633}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_fmeasure": 0.23507855170285297, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003052960609273728}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_precision": 0.09264180506664611, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0030441647048234763}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_recall": 0.10366534795774687, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002886282274864994}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_fmeasure": 0.07429307318653183, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00210574121245154}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_precision": 0.24944159316145484, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004252142505917989}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_recall": 0.26299891956132837, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004120440439329194}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_fmeasure": 0.20109163102106464, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002579325282186586}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_precision": 0.25743050877782325, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004415034565833493}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_recall": 0.25969769735732884, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003739952618361054}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_fmeasure": 0.2054154975263721, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0026590981957875476}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_5.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_5.json new file mode 100644 index 0000000000000000000000000000000000000000..307dba7a2605b6577b9b1a858237bb4950be1db8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "bleu": 1.5291942430627856, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12407405811462165}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_precision": 0.31817552785971276, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005410918975610136}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_recall": 0.3120825384968243, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004423006280320729}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge1_fmeasure": 0.25074445308374155, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003280258168301159}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_precision": 0.11020791501317337, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0037693744168018546}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_recall": 0.114958415897581, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003095157786907941}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rouge2_fmeasure": 0.08476444821349621, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002405571742153227}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_precision": 0.2709202785200124, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004714816857530811}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_recall": 0.2753613124108194, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004151926334002586}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeL_fmeasure": 0.21469809323475145, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002784872235569171}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_precision": 0.28086204710118207, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004933792503425808}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_recall": 0.27229001474697245, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003806628830915923}, {"task_name": "GEM/web_nlg_en", "prompt_name": "implicit-graph-description", "rougeLsum_fmeasure": 0.21938893610824683, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "38342608-5cd7-4ce7-b2e1-905ecd7f4c80", "prompt_jinja": "{{input | join(\"; \")}}\nThe above is a collection of relations. Write descriptive English that contains this information.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0028754942341362322}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_0.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_0.json new file mode 100644 index 0000000000000000000000000000000000000000..6872e143e5ee36fb2ef5a5fee362b87dc2b9ac76 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_precision": 0.048154071638123803, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017394492115037211}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_recall": 0.1603960438769496, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002808863528216255}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_fmeasure": 0.054872501275754215, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0011675161117053734}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_precision": 0.0050437692571547055, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00041097690347038956}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_recall": 0.020874718274915008, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00145782883855474}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_fmeasure": 0.00644935707690507, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00048277790299858106}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_precision": 0.044143178163838545, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015820050437832095}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_recall": 0.14857144649500334, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0026359059490226556}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_fmeasure": 0.05035459707061074, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0010577917832862004}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_precision": 0.04435825561436467, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016624296413482802}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_recall": 0.14581597687921308, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0024815309695857325}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_fmeasure": 0.04968291242100845, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0010556937198168178}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "bleu": 0.06942821394863977, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.00721092856708907}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_1.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_1.json new file mode 100644 index 0000000000000000000000000000000000000000..8938d539b604227d52a4e51ac80a8b87e2b6bbfa --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_precision": 0.16455416189125624, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0025710885946127505}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_recall": 0.2181617130834446, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004232534637085028}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_fmeasure": 0.15006174394417673, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019223053607641332}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_precision": 0.023103353875520607, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0012429378612023923}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_recall": 0.051688701593469964, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0024769333300300395}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_fmeasure": 0.023945131921722818, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001073921369797315}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_precision": 0.13925189436630736, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0021920195516649307}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_recall": 0.19238762602768703, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0040704144827113}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_fmeasure": 0.12712565779418603, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015763640466711865}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_precision": 0.14613811038014662, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0023432542987913535}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_recall": 0.19219266107645588, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036970487118417475}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_fmeasure": 0.13222747966833925, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001685437840262139}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "bleu": 0.7207771490116592, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10258109032487232}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_2.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_2.json new file mode 100644 index 0000000000000000000000000000000000000000..6d1716ed35a549c3780d8f2bd9ab1bd8d8c163c1 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_precision": 0.1840518032934248, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0033695422988167393}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_recall": 0.32294580089178854, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005293640122083205}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_fmeasure": 0.18077149568804796, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0024357765771144814}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_precision": 0.048339216717895615, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0021060002829076923}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_recall": 0.11482202688362075, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0034238195259001535}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_fmeasure": 0.050683857012723176, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016201788737892754}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_precision": 0.1584513791779987, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002944143746092905}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_recall": 0.2930866134885274, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.005175150990392834}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_fmeasure": 0.15676881729864572, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002052846994821611}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_precision": 0.163518547765267, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003098199168660185}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_recall": 0.28299351899056346, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004588174178628674}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_fmeasure": 0.15894468731506023, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0021537616662293205}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "bleu": 0.8172765885695147, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.050751327067539105}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_3.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_3.json new file mode 100644 index 0000000000000000000000000000000000000000..52903c231fd72a24bfcc8fe8333eb1e5866e9a19 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_precision": 0.21339019477940072, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004136854182002759}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_recall": 0.36172056828283894, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005192684637361719}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_fmeasure": 0.20493909153322515, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002712950527519467}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_precision": 0.06875941454459712, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0027123104753735013}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_recall": 0.14026102787918196, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0035390177764360073}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_fmeasure": 0.06763126769213473, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0019242155202766913}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_precision": 0.18497245590804035, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0036161696226493217}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_recall": 0.32950137116051886, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00509054750559144}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_fmeasure": 0.178978835556707, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0023052454882151385}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_precision": 0.18870247477597205, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0037949931460892973}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_recall": 0.31437773260575974, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004468573983043156}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_fmeasure": 0.17901076394504797, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023948023589502633}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "bleu": 0.8232200037553864, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0990062331795447}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_4.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d66ab4dce0ff4e721a830ff169e6e4157e8150f5 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_precision": 0.24073524945767655, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004707514658959234}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_recall": 0.37222777341340585, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005100896049800429}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_fmeasure": 0.2219276041114797, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003043064081835907}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_precision": 0.08343573004395873, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0029901511610586507}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_recall": 0.1518671437094746, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0036105628962266956}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_fmeasure": 0.07863230130745369, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002070687205307558}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_precision": 0.2082283461845528, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004101899636825203}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_recall": 0.3382840179138503, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.005009409498199744}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_fmeasure": 0.19296499339169593, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0025313363149249185}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_precision": 0.21271916694519305, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00429809264467102}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_recall": 0.32505704280562625, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004417521948317753}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_fmeasure": 0.19372279394640554, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0026569278397281402}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "bleu": 0.9445938185251443, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09197403951561334}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_5.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_5.json new file mode 100644 index 0000000000000000000000000000000000000000..f024c7203ff4fa6f9a055cf0ee6116142de3311e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_precision": 0.26082598406654584, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0051359481387504655}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_recall": 0.3729649784159858, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004970827450327891}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge1_fmeasure": 0.23144425717217612, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0031085174926612394}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_precision": 0.09576839794606369, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0035521748356882993}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_recall": 0.1532169603638125, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003593558920823611}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rouge2_fmeasure": 0.08355597806644839, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002189706487328105}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_precision": 0.22583373330234002, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004534237669240004}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_recall": 0.3374628358280029, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004847122676128511}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeL_fmeasure": 0.2012929842528386, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002610242315697286}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_precision": 0.23006368905799984, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00470015880813854}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_recall": 0.3255358190359428, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004336770681537931}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "rougeLsum_fmeasure": 0.2020118383660613, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002720953194833289}, {"task_name": "GEM/web_nlg_en", "prompt_name": "non-explicit-description", "bleu": 1.0828952105973466, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "9415bd8a-685f-4fa4-803a-f09bd47d4603", "prompt_jinja": "I am taking this tabular data, where each row is separated by a \"|\" and expresses a relation between an object and a predicate : {{input | join(\", \")}}. \n\nNow, I will produce a description of the tabular data using English sentences. {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08428338494461286}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_0.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_0.json new file mode 100644 index 0000000000000000000000000000000000000000..04d7441ef082726460293f7a7fa0dcf007b73e9d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_precision": 0.09839982038939207, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016034845059760892}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_recall": 0.557046636180574, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004459443954469606}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_fmeasure": 0.15932996809565791, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021975666324784045}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_precision": 0.03974822393863394, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009259214471201931}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_recall": 0.23724700480115124, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003530724556191307}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_fmeasure": 0.0645288094675153, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013271654890972852}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_precision": 0.08556676553539082, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012344258273556512}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_recall": 0.5120237901085162, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004389079693859156}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_fmeasure": 0.13994234816561368, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001713279737012679}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_precision": 0.08372529560944662, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001428553657612201}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_recall": 0.47520736805531627, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003928372595420955}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_fmeasure": 0.13548370931568984, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001965481361266326}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "bleu": 0.70556499870502, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07021163423422254}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_1.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_1.json new file mode 100644 index 0000000000000000000000000000000000000000..07d85414b3d3a12493082cb1ee564b941dc62d97 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_precision": 0.15192244328141744, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0026851175239504146}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_recall": 0.4088076221119547, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005307206147263785}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_fmeasure": 0.1795535626460401, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023078723613743383}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_precision": 0.042264113746990004, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001519132737951407}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_recall": 0.15816170460414825, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003648850558319156}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_fmeasure": 0.056216955504673816, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015622457935091263}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_precision": 0.13222718563169758, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0023137442474583567}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_recall": 0.37772895967872394, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.005250016280580306}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_fmeasure": 0.15827887417485154, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019506890461920023}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_precision": 0.1331053325871554, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0024745486155965113}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_recall": 0.3552035560328879, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004588945170502327}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_fmeasure": 0.15584864004949936, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020620407818320073}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "bleu": 0.7666335106831184, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0635865012850457}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_2.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_2.json new file mode 100644 index 0000000000000000000000000000000000000000..01ac442aac6402f72a4095a884e442a885fe22e9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_precision": 0.18082568461374282, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003639459051365589}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_recall": 0.4264948458628402, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004935119334379477}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_fmeasure": 0.1995262410573067, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0024883401069483438}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_precision": 0.06212406576427143, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0023869401244636375}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_recall": 0.17467748933722366, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003536521311814631}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_fmeasure": 0.07054805798392726, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016830190110589496}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_precision": 0.15912923447907845, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0032345958330729694}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_recall": 0.39493138391122135, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004915557812492545}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_fmeasure": 0.1770227263859978, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020868398943825295}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_precision": 0.15978841140185437, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0033957422368174534}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_recall": 0.37244471989166417, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004290031616626638}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_fmeasure": 0.1741607140832361, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022309499919646618}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "bleu": 0.8036503769411473, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08352144767185858}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_3.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_3.json new file mode 100644 index 0000000000000000000000000000000000000000..3c0db03dad3a9fd7bd10f505c5ad01805a713f11 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_precision": 0.20161323138180814, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00419614102063665}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_recall": 0.4284437489377629, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004747351313475394}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_fmeasure": 0.20998314436048413, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0026958272587435566}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_precision": 0.07075454828433567, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0025432659534373814}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_recall": 0.17982051325515352, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0035838481085783518}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_fmeasure": 0.07655162918992574, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001828909076942692}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_precision": 0.17689528139425809, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0036676387595752816}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_recall": 0.39708608920535615, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004751490642713908}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_fmeasure": 0.1863768106489585, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002259606335187374}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_precision": 0.1782191584681922, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0038632898410200503}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_recall": 0.37361714826137415, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004151105270083309}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_fmeasure": 0.18356302820432102, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0024163227573974415}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "bleu": 0.9213985772786043, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09261357714373215}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_4.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_4.json new file mode 100644 index 0000000000000000000000000000000000000000..e6297a00d8294132c7ef82fa9b89d33a2799fc52 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_precision": 0.22590230879356132, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004885739171030689}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_recall": 0.4183617818367607, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004646737629595512}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_fmeasure": 0.2203882766391892, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003092911335667608}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_precision": 0.08369577119049203, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0029217645456625545}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_recall": 0.17612573754542205, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0035387797965567075}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_fmeasure": 0.08291524084048744, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001997647183245828}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_precision": 0.19700967528722899, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00420918433484521}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_recall": 0.3860053232564878, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004602487402565482}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_fmeasure": 0.19451918196713988, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0025549062189435443}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_precision": 0.19774103481256436, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004390815504231839}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_recall": 0.3641519676371665, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004085150565177179}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_fmeasure": 0.1912941284472393, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00269200231433118}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "bleu": 0.8669579841221637, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08180503550292748}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_5.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_5.json new file mode 100644 index 0000000000000000000000000000000000000000..e2315a8686caea1efb1b44cbdd76e3dadd52bd0d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_precision": 0.2333099109106658, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004918167379356697}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_recall": 0.40468387245463144, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004615290964853879}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge1_fmeasure": 0.221868544853033, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003062043218874345}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_precision": 0.08575749645339896, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0029475165939869268}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_recall": 0.16978922632954452, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00346238061542852}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rouge2_fmeasure": 0.08265916020277501, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001971954733869372}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_precision": 0.20506533085409256, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004283642129477899}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_recall": 0.37349518444433055, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004568965644534639}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeL_fmeasure": 0.19660756094703621, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0025597130950611555}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_precision": 0.20613211140017276, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004444626909561573}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_recall": 0.3539709992804558, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004068457607282714}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "rougeLsum_fmeasure": 0.19418171975741957, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002689265498940047}, {"task_name": "GEM/web_nlg_en", "prompt_name": "very-explicit-description", "bleu": 0.949825770028968, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "426b682e-e801-4e8d-9ac3-5b676c9d3da2", "prompt_jinja": "A semantic triple is the atomic data entity in the Resource Description Framework (RDF) data model. As its name indicates, a triple is a set of three entities that codifies a statement about semantic data in the form of subject\u2013predicate\u2013object expressions. (e.g., \"Bob | is | 35\", or \"Bob | knows | John\"). \n\nA graph can be formed from a set of these triples. An example is {{input | join(\", \")}}. \n\nWrite grammatical text expressing all the relations succinctly and fluently.\n{% for i in references %}\n ||| {{ i }} \n{% endfor %}\n\n", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07260355364195568}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_0.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..d0af02613af7c3bfb72d066177257860bee9cba8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_precision": 0.1975977152505175, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019210439152713556}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_recall": 0.3571450155403217, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027159219787335877}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_fmeasure": 0.23668156796351772, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017834284452177681}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_precision": 0.04679995732393755, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008453000647757492}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_recall": 0.08872427020642143, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001757359421073499}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_fmeasure": 0.056425445419599346, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009610968832196708}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_precision": 0.13079025220025553, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001163188240810311}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_recall": 0.24818548845368613, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002206255979336452}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_fmeasure": 0.15899265084812766, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001137037008281727}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_precision": 0.18296614395904412, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001771259927022808}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_recall": 0.3318205839417274, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025527759940675494}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_fmeasure": 0.21937178453510964, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016467565432194853}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "bleu": 2.526005202191506, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.058273418733684156}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_1.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..e964914d81a68ff881f12c1f46d500d0a26b28b5 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_precision": 0.15054373183476955, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001998147829744307}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_recall": 0.22938700618092542, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003041552612274185}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_fmeasure": 0.1625536760242897, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019424044371032676}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_precision": 0.026747967720227677, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008081761640449396}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_recall": 0.04688223062252883, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001502981154526947}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_fmeasure": 0.03048470823212383, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008439966461249648}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_precision": 0.11130833909157777, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001381727326473868}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_recall": 0.17251099505438725, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002267024463807693}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_fmeasure": 0.11990114200505439, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012809983090350478}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_precision": 0.14096779097971843, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018582505585400385}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_recall": 0.21419795786755574, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00281418674755583}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_fmeasure": 0.15185020212271533, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017871174330384631}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "bleu": 1.7313590804890209, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05908907418295776}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_2.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f0820f1a9472f7456350f1633effa3ab419b6a57 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_precision": 0.14664845124249912, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0021074927136243747}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_recall": 0.22029309843953365, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002950858143159719}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_fmeasure": 0.15659972337101613, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001906663384031481}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_precision": 0.025832066720273383, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008715375341951669}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_recall": 0.04297038579858274, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014465923870447984}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_fmeasure": 0.028283684834043706, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000821620933532687}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_precision": 0.11186134311345497, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015665677693472357}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_recall": 0.17054780622395707, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002242184695689906}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_fmeasure": 0.11924857854706734, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013235141134357415}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_precision": 0.1363376415997164, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0019672230168232964}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_recall": 0.20473556515895666, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002733779107620804}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_fmeasure": 0.14532637028195308, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017544389892467619}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "bleu": 1.5124106778411996, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06141046632956911}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_3.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..10e5a0c5d25f13b3a5084085e9525efcfa4376ca --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_precision": 0.1237560514705609, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002282039852755364}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_recall": 0.17609270530427382, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0030210209846962413}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_fmeasure": 0.1258659396327009, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001992963699044068}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_precision": 0.020954462249452663, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007965730554829935}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_recall": 0.033657403446598254, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001329371810945043}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_fmeasure": 0.021957452841427055, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0007360198417813418}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_precision": 0.09658445599443373, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001753215991240076}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_recall": 0.13975525526667698, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0023736809100178683}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_fmeasure": 0.0981193897316493, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014598347738579937}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_precision": 0.11453614366434078, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002105151678823837}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_recall": 0.16338969575022716, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0028019873925272075}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_fmeasure": 0.11650869217135837, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001832893857886487}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "bleu": 1.3802963074575254, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06732152627137003}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_4.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..45403ff1d83ce3de8a01d3bba93e76978fff344e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_precision": 0.04395368484664138, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018424926390462097}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_recall": 0.05774517711149188, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002305158478827759}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_fmeasure": 0.04204648300277925, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016015557898894717}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_precision": 0.00844841454877157, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006603987371609425}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_recall": 0.012177055063554206, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009005039320534978}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_fmeasure": 0.008202499844200142, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005608948554894471}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_precision": 0.035216144797083765, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014914106117576477}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_recall": 0.04621071195954536, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001796318755601515}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_fmeasure": 0.03339087455481403, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012365521644087543}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_precision": 0.04090439759741698, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017246420675129577}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_recall": 0.05360227908215058, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0021376470771195598}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_fmeasure": 0.0389854987222177, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001481057274442113}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "bleu": 0.22377593301174895, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.022223303535369093}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_5.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..dbcb9f8a69cf52c7acc7ba2fbf50468f9536883c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_precision": 0.006968711039755044, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0008981875218117217}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_recall": 0.009045805133122158, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0010585291219916539}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge1_fmeasure": 0.00624650026917102, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006896033744358352}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_precision": 0.00147397688199298, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00031382142309567925}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_recall": 0.0023038793256885807, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0005038053110111991}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rouge2_fmeasure": 0.001354763281978201, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0002354570603752033}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_precision": 0.005684552101183715, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0007811869359473511}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_recall": 0.0073860095468518735, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0008694725694459097}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeL_fmeasure": 0.004998030899560508, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0005484876409153318}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_precision": 0.006504958900325744, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0008576188233041718}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_recall": 0.008412531590010163, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0009903950820560048}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "rougeLsum_fmeasure": 0.005785170257262004, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.000640536066601862}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "article_summary_en", "bleu": 1.3197166417911808e-08, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "2038df7b-5420-4a33-87ec-09715419deef", "prompt_jinja": "Article in English: {{source}}\n\nSummary in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 2.839866989657345e-08}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_0.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..4be7af70d4dba5727c913b1eaa1353981a36ac6a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_precision": 0.09557902324303133, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015934233011554208}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_recall": 0.15195533247270118, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002167076868910249}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_fmeasure": 0.10814475341505056, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001524767202301828}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_precision": 0.010021499848992905, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00042254787804722635}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_recall": 0.0175643734135993, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008555885514606594}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_fmeasure": 0.011588030659270141, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0004784186900067079}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_precision": 0.08020042698947122, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012460897241632469}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_recall": 0.13117713414918547, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0018429649355817298}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_fmeasure": 0.09167513906886186, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012095215627801854}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_precision": 0.08997576030672756, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014864753127451467}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_recall": 0.1439070530072875, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002057912841397234}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_fmeasure": 0.10198753546847392, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014211405382431829}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "bleu": 0.6145809763628965, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05797766316597433}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_1.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..90b5635e0761c75d1a696d407b1b21e139b9c898 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_precision": 0.11274337869180198, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015080376244132586}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_recall": 0.10983840856878206, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0014551714150494175}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_fmeasure": 0.0979457603122117, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0011364196004116048}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_precision": 0.004607065202297954, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00029696477370218157}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_recall": 0.004443770455530801, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00030921576747671716}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_fmeasure": 0.003914558098208722, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00023624265329673873}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_precision": 0.09101884007968876, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001195226939453244}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_recall": 0.08894440577554404, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001162325195438461}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_fmeasure": 0.07861266088907197, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0008536301328765664}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_precision": 0.10885553700903176, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014410450568103732}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_recall": 0.10635049542751197, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0014076769390450833}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_fmeasure": 0.09467060993519806, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0010886015566558735}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "bleu": 0.2408792681226857, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.024130354527193396}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_2.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..e0d9cd3989968b51b76dbbb18f9c2e416ead5247 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_precision": 0.11450203486111951, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015512493201278732}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_recall": 0.11764026218464517, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0017763168006867999}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_fmeasure": 0.10129546926820872, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0012376127878624412}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_precision": 0.005990341232345204, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0003778566735238687}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_recall": 0.007150893881572016, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0005954055858899312}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_fmeasure": 0.005443826668587504, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003144404879778184}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_precision": 0.09176826957181027, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012268770230593445}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_recall": 0.09444076989884408, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0014215142609522416}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_fmeasure": 0.08046273244596494, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0009128776785211843}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_precision": 0.11011996762454923, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014803727480206098}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_recall": 0.11312057420513341, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0016882892537450165}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_fmeasure": 0.09737556397488158, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0011716121889168436}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "bleu": 0.42527357697495255, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06953287436188474}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_3.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..4a932c57b02c75dbbedc8f9dc1b67dd4af0c55ed --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_precision": 0.10053229311714834, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018186300913397432}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_recall": 0.10555528455488752, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0020869634882788126}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_fmeasure": 0.08745924967780058, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00145734596535055}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_precision": 0.007244189708893669, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00047064519532168336}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_recall": 0.009481343391595305, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0007029880713328167}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_fmeasure": 0.0066746059699027156, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000406229568602048}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_precision": 0.08113908472999296, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014422772176468318}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_recall": 0.0854911553393767, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001688161117709379}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_fmeasure": 0.06994035855230117, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0011004649845343271}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_precision": 0.09576776212352223, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017136097429588115}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_recall": 0.10032211171110292, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.001963048810667937}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_fmeasure": 0.08314051391123733, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001363454598942612}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "bleu": 0.5943947138668615, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04701176872890474}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_4.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d326dcda9b89a36525a3cdcff5785112d5f3e6a8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_precision": 0.035781196323287794, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015508373183201957}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_recall": 0.03889017824317372, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0017168088677622815}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_fmeasure": 0.03034948263830278, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0012288540754404823}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_precision": 0.00393091834595338, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0004260496428881099}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_recall": 0.0052037277375677786, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000540702114853635}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_fmeasure": 0.003519391416781204, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003344402761329188}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_precision": 0.02946476725548691, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012750099020740124}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_recall": 0.03200016757522908, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0013894967755151602}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_fmeasure": 0.02470528087251438, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0009707901157133207}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_precision": 0.03393706130302841, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014660077905268025}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_recall": 0.03664599115059629, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0016081833996057495}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_fmeasure": 0.028653137000749736, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0011499184993932895}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "bleu": 0.06155277062691283, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.008573159858072549}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_5.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..36b23fdc62098c92fccb1e61589044e4c45abced --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_precision": 0.005466518578601791, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0006360451019998415}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_recall": 0.005849281181457089, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00074547507260093}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge1_fmeasure": 0.004552330658915172, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0005166062646006778}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_precision": 0.0007290381746496603, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00017635675582582084}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_recall": 0.0008879185087143063, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0002685233025455508}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rouge2_fmeasure": 0.0005748508715984116, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0001533404190150482}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_precision": 0.004641407616449875, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0005531411811140234}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_recall": 0.004667322692384839, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0005697941762332262}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeL_fmeasure": 0.0037163176139750516, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0004064756060554951}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_precision": 0.005301656522138449, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0006174490006150588}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_recall": 0.005552151354486044, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0006921370486199647}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "rougeLsum_fmeasure": 0.00434986186087182, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00048298225975487925}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "rephrase_en", "bleu": 9.384428445264235e-13, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "753f0a46-aeff-4cd2-932c-8548897cebe5", "prompt_jinja": "{{source}}\n\nHow would you rephrase that briefly in English? ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 2.048496076777406e-11}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_0.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..5be5bc86819de8f9fa2c545eafcadefb12aeeabb --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_precision": 0.06083058515622332, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015146719263047607}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_recall": 0.08130079272174061, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0017565791128464651}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_fmeasure": 0.06398795450213274, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014079644213715252}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_precision": 0.0050127471219516785, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00033788530642644006}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_recall": 0.006932802618765814, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00047568921018848896}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_fmeasure": 0.005239039550783603, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003252538524207895}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_precision": 0.056280510609653965, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013608121285239516}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_recall": 0.07622653073256713, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0016225806985423702}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_fmeasure": 0.05958527780779474, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012805893801553889}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_precision": 0.05661921621032954, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001398882606899158}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_recall": 0.0762057149480866, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00164425094307365}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_fmeasure": 0.05971017630902041, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013030395843137727}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "bleu": 0.2622610605101529, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.040142958508706195}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_1.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..3df2424b657b4017d82e8e5997b62bd7a5e0df2b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_precision": 0.11326508510637766, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015009170735602506}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_recall": 0.1085356303020659, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0014115882108590343}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_fmeasure": 0.09793700832399474, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0011273790641300194}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_precision": 0.004465655727904388, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00028238368212925503}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_recall": 0.004056065876580251, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0002691375832689169}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_fmeasure": 0.0037395950495648657, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00022671756481262202}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_precision": 0.09142917649433346, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011841565139209087}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_recall": 0.0879166314153642, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0011318568380660808}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_fmeasure": 0.07862622799687088, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0008488937771314072}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_precision": 0.1093895229104316, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014332473740733347}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_recall": 0.10513740023129456, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0013659288669536467}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_fmeasure": 0.09470526990675743, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0010805621439727475}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "bleu": 0.2382502920735501, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02364732083952621}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_2.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..3ac1bba81e05ba9de7bc37da5455ba053b3e4f55 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_precision": 0.1195188168282025, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001703506896411307}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_recall": 0.1202411166531759, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0016720477170315902}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_fmeasure": 0.10460998387087274, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0012689330036383434}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_precision": 0.007701756834163109, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0004826378231441523}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_recall": 0.007828279383704012, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00048735370780030713}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_fmeasure": 0.006572282621218325, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00036858964749543314}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_precision": 0.09584725807304968, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013390205513148664}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_recall": 0.09711441691325796, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0013255057315948026}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_fmeasure": 0.08348917343043502, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0009381191407596309}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_precision": 0.11507270211895362, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016248601270300064}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_recall": 0.11590379555656585, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0015972136190990915}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_fmeasure": 0.10076143309185726, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0012074937805280613}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "bleu": 0.590846188853053, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08622026318742701}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_3.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..30d1313db4efb1d050a029421a57b9e899267409 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_precision": 0.10561107711679257, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002135116442535178}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_recall": 0.10039756771582228, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0018998579014479846}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_fmeasure": 0.08635223714862443, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001447627888207993}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_precision": 0.010532137196295798, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008099525943800139}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_recall": 0.009664993083625497, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0006422735345150598}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_fmeasure": 0.00773667698720356, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00044084771240741574}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_precision": 0.08698250921011949, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017942983133012848}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_recall": 0.08314557123023888, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0015976310654642362}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_fmeasure": 0.07053215664409358, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0011503059093946877}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_precision": 0.10076653862384102, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002033643570526417}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_recall": 0.09561293861925456, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.001792928927851378}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_fmeasure": 0.08220026138629714, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013594564836760387}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "bleu": 0.5908356190981445, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04489447675590248}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_4.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..7d7c1522c6d11282b31b77dae7455fc1597536e7 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_precision": 0.035705986442768456, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001618640602215546}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_recall": 0.03197561607719162, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0014717555351632275}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_fmeasure": 0.027402695270076872, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001157724691879728}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_precision": 0.00401479182015976, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00043777035338946497}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_recall": 0.003920400654913368, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0004172562730537138}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_fmeasure": 0.00317018932471084, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00031714568787250826}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_precision": 0.03013013623521446, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013844671087526951}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_recall": 0.026445775089489882, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0011917063204159026}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_fmeasure": 0.022642473398844034, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0009329018940160789}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_precision": 0.033971789988392904, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001538890119076818}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_recall": 0.03041146272732838, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0013938550134374181}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_fmeasure": 0.025956946800132486, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0010846296784172967}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "bleu": 0.029093100164270518, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.005798545611396172}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_5.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..a96001b6216e37dda200231ba0ecf54db49971a4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_precision": 0.00595043307989505, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.000767651406351477}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_recall": 0.004778481595486118, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0006233197620300213}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge1_fmeasure": 0.00450639531355031, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.000555584129480168}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_precision": 0.0012396636176066488, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00034778633785854403}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_recall": 0.0008030145717563384, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00020935765891227752}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rouge2_fmeasure": 0.0007838553827958208, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00021051541322103913}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_precision": 0.005169809582323384, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0006759018705997662}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_recall": 0.004164169546014226, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0005467863316598697}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeL_fmeasure": 0.0039016663853958123, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.000485959181689575}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_precision": 0.005734093038513017, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0007492388147769349}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_recall": 0.004562529353283875, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00059727108953185}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "rougeLsum_fmeasure": 0.004298574460690419, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005305976692789258}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "summarize_above_en", "bleu": 2.233297948144298e-16, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "088288f3-7516-4cf7-9406-0e082053bf54", "prompt_jinja": "{{source}}\n\n===\n\nWrite a summary of the text above in English : ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 7.375838022248224e-15}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_0.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..1574d869a5f711bd9868e766d53d4026c22f226f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.10367122573898108, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016311453272566847}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1621848426933469, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002239905291843337}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.11655773215808052, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015844131287312152}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.012276074546298633, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005003323251677242}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.020862324606727274, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009624161724708909}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.014011218070534905, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005398251030136181}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.08966835876675931, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00130510256531558}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.14342974130461322, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019375952453362063}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.10161863320310922, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012820750167481308}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.09728691738451277, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015087459211237015}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.1530554381860451, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0021142656537031777}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.10956185275309588, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014665536918047877}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.7280055452213583, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06972461776949417}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_1.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..16ecf493487359125fac8bacd334b9b8093fa60b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.11922048835394526, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017010312663949007}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.12373373738142388, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0018173643062502098}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.10548837365464835, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0012991497483475392}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.008234339790533517, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005690033754874188}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.009356412797919933, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0005938642159151799}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.007230676056378635, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0004084061340426134}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.09596744652257369, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001368600668118387}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.09975162242794412, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0014345772942689061}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.0843032809934075, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0009705899345773655}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.11440643074566167, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001619972437724354}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.11862725022797178, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0017155423340356613}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.10113335490345467, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0012233443711385755}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.5504071981349774, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0561562373694907}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_2.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f0a2b37d1729972d8e6c3ef7c64ce83d2764bcf6 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.13659331063329858, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0022487372544365814}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.14392803155149808, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0021426965551337935}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.11914801472840628, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015610543341787234}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.017190323002243903, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009306182513579381}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.01804489565450503, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000825364925597016}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.014134158871786717, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000595539839287858}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.11065349886551568, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018034426665467446}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.11850505866727387, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0017735732164141282}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.09639205333227598, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0011890649378743271}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.12944158114309687, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0021224960396573653}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.1364172032563405, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002022193400173333}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.11276308928122987, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014568432793235883}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.0951241331559471, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06468580769617795}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_3.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..9042ba57aa0fd9cf7fc4d7a6f36293ad10c92f5d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1325368577188755, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0027805256927643865}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.12731530185422324, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0023277957556830046}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.10595502188431383, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017328375397109063}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.021208434345761078, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0012054076568754222}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0189911073417682, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009097509424826536}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.015114213677740143, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0006401159920792145}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.1098153192916977, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0023103747538012406}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.10713066473487352, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019653832434662097}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08778629502724616, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013978298599760263}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.12430578995748448, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0026077066778884906}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.11938462834076304, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002179423844395387}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.09922221098825855, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001608236044311444}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.0702430413152522, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07113210724258658}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_4.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..393864a832fd96f9378cc3701fea4adf672135d6 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.05029331061394819, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002207006374572226}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.04435204949042938, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0018249482984958308}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.03772701558859641, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014496259811778097}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.009550867923430868, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009379034663216416}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.00752924590473579, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0006210729065705739}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.006349834491065967, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000497796787181232}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.042547817990021886, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018896153572281916}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.03761001948729707, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001533156465356555}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.031758388592257075, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012082854006158594}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.04733936020765825, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002088413158037917}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.041282972252420995, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0016916980136133567}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.03520149112819018, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013434925962965214}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.1011859793046015, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.014350638137120817}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_5.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..0ec2cf1f5f64707214a17fb43f3cac7c14ea92a0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.009316160183573745, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011155532666113311}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.006701979396490877, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0007675445971480603}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.006210622496685344, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006727949372632281}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.002346103781743029, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00054243039709031}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0012899695832633785, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00031378439187027}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0011831626630347113, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0002436110293420105}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.00791303900867605, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0009792864483272245}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.0056630421132579855, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0006466924999339596}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.005221351425348215, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0005750924567208641}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.008929134383929085, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010845799476391675}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.006287352900516465, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0007115097244397184}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.005877130570728575, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0006401737602435983}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 8.867413076733536e-12, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 4.796386448449034e-11}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_0.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..262649617689ada43a9564d171bc25aa79cee55d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_precision": 0.11295901916552518, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0021561636775618825}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_recall": 0.15716194752418464, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0023658263657045466}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_fmeasure": 0.11770828296503491, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017536210085080694}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_precision": 0.015052684287067302, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005896104288986676}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_recall": 0.023031044736046974, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009581139045387344}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_fmeasure": 0.016689036256012268, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0006284511806433984}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_precision": 0.09584165693939237, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001826823689459275}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_recall": 0.13488328000528105, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019094926140183217}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_fmeasure": 0.09961737449184858, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001336830173088306}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_precision": 0.10435726439378455, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0019288664257451583}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_recall": 0.14671369444409713, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022093912088684674}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_fmeasure": 0.10939848979449732, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001611104432360681}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "bleu": 0.8397297265679019, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06419556092589017}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_1.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..bae329ca3bdd35ff502d12f12e8af498221cd4a3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_precision": 0.11280797879908812, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015176355293584295}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_recall": 0.10974032684176149, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0014531379840037003}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_fmeasure": 0.09800099824939931, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0011455058233680163}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_precision": 0.004669460064109281, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00029342600613201146}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_recall": 0.0044258058822204596, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0003029421760704375}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_fmeasure": 0.003943433623482069, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0002376042862947667}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_precision": 0.09100549212005561, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012010483184015203}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_recall": 0.08875940267139816, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0011563650751118756}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_fmeasure": 0.07857164868748949, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0008607791775228845}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_precision": 0.10880498670928285, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014497893036322746}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_recall": 0.10606739321335039, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.001402205534200808}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_fmeasure": 0.09458732701792698, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0010956881578153168}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "bleu": 0.25575772226089033, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03569958145217157}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_2.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..492edc50f3bd2e9ca971e5240cfa3afef8829d0a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_precision": 0.11515560674851932, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015509282528847878}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_recall": 0.12300491373534446, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0017806103612246057}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_fmeasure": 0.10458659667435967, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0012861835501711226}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_precision": 0.006395365145636472, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0003591958546304461}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_recall": 0.0077962010381853256, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0005289307912376033}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_fmeasure": 0.006134109611250491, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003481002830495667}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_precision": 0.09124703921693972, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001206351363616421}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_recall": 0.09741908894309344, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0013730905688191196}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_fmeasure": 0.08203887538980079, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0009229225262216014}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_precision": 0.11059022840815862, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014851063649001237}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_recall": 0.11805167829668524, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0016988734128864996}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_fmeasure": 0.10037653634062631, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0012231289195270326}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "bleu": 0.48504301830637025, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05137507780432141}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_3.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..f43fa786eb4129090ac82d97c25a5979546992a0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_precision": 0.09166326015675812, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001863618297222859}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_recall": 0.09439692771782093, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002049639853878195}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_fmeasure": 0.07909868477561151, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014723495062479318}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_precision": 0.006815303592116684, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0004965126210719875}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_recall": 0.008260177773973776, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0006508910738926214}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_fmeasure": 0.006138307630609741, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003968736257003603}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_precision": 0.07314862524882004, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014905595972838524}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_recall": 0.07438392591435516, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001576968342669606}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_fmeasure": 0.061996926537050155, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001089486193345418}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_precision": 0.08729866156868725, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017644387577357547}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_recall": 0.08978161538723402, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0019225188795508}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_fmeasure": 0.07526948626877389, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013834299708953759}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "bleu": 0.4671279058356674, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.046623136526393195}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_4.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..2eae276f7d1f1cb0925c3e924b5343f26d32486b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_precision": 0.024122525784377926, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001268480469741408}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_recall": 0.02373843528685194, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0012776937483044686}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_fmeasure": 0.019517882288829225, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0009604584223230579}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_precision": 0.002090947286237258, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0003051655289255735}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_recall": 0.0023982591853571687, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0003799771234361843}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_fmeasure": 0.00167678535423955, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000209849702180494}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_precision": 0.019682867450734345, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010315579522385012}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_recall": 0.01952315908954502, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0010418891548632763}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_fmeasure": 0.015788003617005678, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0007534854041442719}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_precision": 0.023011530370527344, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012021135466980504}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_recall": 0.02274482533911906, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0012249850245862933}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_fmeasure": 0.018599345217623072, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0009070972782003065}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "bleu": 0.004214867695194402, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0009834876936663747}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_5.json b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..90e39b705590e0a8a070d3be34e19e448b9fc179 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_precision": 0.0025439911728014682, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00044089657241181616}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_recall": 0.002127710352300543, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0003543742353112539}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge1_fmeasure": 0.0019664292916783234, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0003120896330989726}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_precision": 0.0001923386207866496, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 7.972926456097242e-05}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_recall": 0.00015798104521609337, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 6.680290787534892e-05}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rouge2_fmeasure": 0.00013934402915387212, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 5.871856589143859e-05}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_precision": 0.0020080907200489616, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0003261035212045945}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_recall": 0.0017331074442032213, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00028920031821567146}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeL_fmeasure": 0.00156729637433548, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00024160981216417036}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_precision": 0.0024784113984497643, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0004288948292667904}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_recall": 0.0020662127056030035, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00034094966500850676}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "rougeLsum_fmeasure": 0.001910974206326472, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0003010647724742626}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "write_abstract_en", "bleu": 1.234466237515543e-32, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "dff7b314-7385-4855-bb90-253073a34fde", "prompt_jinja": "First, read the English article below.\n\n{{source}} \n\nNow, please write a short abstract for it in English. ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 8.15926545424835e-28}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_0.json new file mode 100644 index 0000000000000000000000000000000000000000..1d7da23d07535d089b16c646405491b999ba86a0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "GPT-3 style", "acc": 0.336, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014944140233795025}, {"task_name": "anli_r1", "prompt_name": "GPT-3 style", "acc_norm": 0.336, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014944140233795021}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_1.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1d6e29d11a291e2eccbb5643d93ac7160bbd4c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "GPT-3 style", "acc": 0.341, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014998131348402702}, {"task_name": "anli_r1", "prompt_name": "GPT-3 style", "acc_norm": 0.332, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014899597242811482}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4acf102a1a6297c24f723cae45371dd38634ad --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "GPT-3 style", "acc": 0.363, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015213890444671287}, {"task_name": "anli_r1", "prompt_name": "GPT-3 style", "acc_norm": 0.355, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015139491543780529}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_3.json new file mode 100644 index 0000000000000000000000000000000000000000..ab1b848bf48aaf2cca68081fc7c511b5f90eb474 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "GPT-3 style", "acc": 0.357, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015158521721486769}, {"task_name": "anli_r1", "prompt_name": "GPT-3 style", "acc_norm": 0.352, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015110404505648663}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_4.json new file mode 100644 index 0000000000000000000000000000000000000000..47a7d858de0779cc6edc0967b6c18638cb92ed8c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "GPT-3 style", "acc": 0.363, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015213890444671283}, {"task_name": "anli_r1", "prompt_name": "GPT-3 style", "acc_norm": 0.356, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015149042659306623}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_5.json new file mode 100644 index 0000000000000000000000000000000000000000..e11fe6b14843533b0a287785d71e917306402362 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_GPT-3-style_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "GPT-3 style", "acc": 0.351, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015100563798316402}, {"task_name": "anli_r1", "prompt_name": "GPT-3 style", "acc_norm": 0.339, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014976758771620347}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_0.json new file mode 100644 index 0000000000000000000000000000000000000000..61523700f110584136adf6a3d8fd6862ccea64e3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "MNLI crowdsource", "acc": 0.334, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014922019523732954}, {"task_name": "anli_r1", "prompt_name": "MNLI crowdsource", "acc_norm": 0.33, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01487687202745673}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_1.json new file mode 100644 index 0000000000000000000000000000000000000000..cc2b5dcf1eae830a86e770c6bda2dff4526cdde1 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "MNLI crowdsource", "acc": 0.333, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014910846164229863}, {"task_name": "anli_r1", "prompt_name": "MNLI crowdsource", "acc_norm": 0.333, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014910846164229863}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_2.json new file mode 100644 index 0000000000000000000000000000000000000000..5e8ec06b76551574dff82cb8a38586d9e1d7be47 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "MNLI crowdsource", "acc": 0.359, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01517726422479859}, {"task_name": "anli_r1", "prompt_name": "MNLI crowdsource", "acc_norm": 0.357, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015158521721486764}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_3.json new file mode 100644 index 0000000000000000000000000000000000000000..e0a52e16f39bf883053d9ae28b804e04a72a22e6 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "MNLI crowdsource", "acc": 0.349, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015080663991563098}, {"task_name": "anli_r1", "prompt_name": "MNLI crowdsource", "acc_norm": 0.341, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01499813134840271}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_4.json new file mode 100644 index 0000000000000000000000000000000000000000..645c6d7b179b75dd036b379a53127461eb5d0dd8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "MNLI crowdsource", "acc": 0.342, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01500870618212173}, {"task_name": "anli_r1", "prompt_name": "MNLI crowdsource", "acc_norm": 0.339, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014976758771620335}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_5.json new file mode 100644 index 0000000000000000000000000000000000000000..85d08bde786e2998dda93b5ae99aeb966305d406 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_MNLI-crowdsource_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "MNLI crowdsource", "acc": 0.34, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014987482264363937}, {"task_name": "anli_r1", "prompt_name": "MNLI crowdsource", "acc_norm": 0.332, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014899597242811483}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_0.json new file mode 100644 index 0000000000000000000000000000000000000000..a7e8658aca1e98390d118b9a1fb526193fcae7ed --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "can we infer", "acc": 0.34, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014987482264363937}, {"task_name": "anli_r1", "prompt_name": "can we infer", "acc_norm": 0.329, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01486539538592836}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_1.json new file mode 100644 index 0000000000000000000000000000000000000000..dd6233c85ccf275b3858c62ece9b587580fbde2a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "can we infer", "acc": 0.33, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014876872027456734}, {"task_name": "anli_r1", "prompt_name": "can we infer", "acc_norm": 0.343, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015019206922356953}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_2.json new file mode 100644 index 0000000000000000000000000000000000000000..4ef7515cf64146461f6f1497818a84b306cc422f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "can we infer", "acc": 0.351, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015100563798316405}, {"task_name": "anli_r1", "prompt_name": "can we infer", "acc_norm": 0.35, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015090650341444236}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_3.json new file mode 100644 index 0000000000000000000000000000000000000000..69fc6cd7ff052667026da83502e8c0dcc7c2228f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "can we infer", "acc": 0.363, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015213890444671285}, {"task_name": "anli_r1", "prompt_name": "can we infer", "acc_norm": 0.354, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015129868238451772}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_4.json new file mode 100644 index 0000000000000000000000000000000000000000..5ea7b75cf2d7b8ff61e9fce1e7534751f071f2b4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "can we infer", "acc": 0.363, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01521389044467128}, {"task_name": "anli_r1", "prompt_name": "can we infer", "acc_norm": 0.351, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015100563798316403}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_5.json new file mode 100644 index 0000000000000000000000000000000000000000..8c7b9c81441adcb42cd98ca308bfb3d58a086bfd --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_can-we-infer_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "can we infer", "acc": 0.355, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015139491543780529}, {"task_name": "anli_r1", "prompt_name": "can we infer", "acc_norm": 0.351, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015100563798316402}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_0.json new file mode 100644 index 0000000000000000000000000000000000000000..30f02a8185b89e2702dcd1444d2edee8374d82fd --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "guaranteed/possible/impossible", "acc": 0.331, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01488827258820394}, {"task_name": "anli_r1", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.322, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014782913600996673}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_1.json new file mode 100644 index 0000000000000000000000000000000000000000..d8808717287a9525f14d4d76a506fd2b0e473bb2 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "guaranteed/possible/impossible", "acc": 0.333, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014910846164229863}, {"task_name": "anli_r1", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.333, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014910846164229863}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_2.json new file mode 100644 index 0000000000000000000000000000000000000000..c6d4984c345c7eb9d346cefd2bba79b5dad82cc5 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "guaranteed/possible/impossible", "acc": 0.347, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01506047203170662}, {"task_name": "anli_r1", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.348, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01507060460376841}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_3.json new file mode 100644 index 0000000000000000000000000000000000000000..6a3826b42b6dabd8b92a7078829b1e222cc2c51f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "guaranteed/possible/impossible", "acc": 0.347, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015060472031706618}, {"task_name": "anli_r1", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.35, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015090650341444236}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_4.json new file mode 100644 index 0000000000000000000000000000000000000000..0e7bb048a5f271c38521b4ac61021b65eda9ab76 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "guaranteed/possible/impossible", "acc": 0.344, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015029633724408948}, {"task_name": "anli_r1", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.343, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015019206922356951}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_5.json new file mode 100644 index 0000000000000000000000000000000000000000..00de70be50a8484fadc07e5097055b8948a75d7b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "guaranteed/possible/impossible", "acc": 0.335, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014933117490932575}, {"task_name": "anli_r1", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.336, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014944140233795023}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_0.json new file mode 100644 index 0000000000000000000000000000000000000000..7100fcfb9a71f54c260e0541cac2038b9c539cd9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "justified in saying", "acc": 0.341, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014998131348402713}, {"task_name": "anli_r1", "prompt_name": "justified in saying", "acc_norm": 0.326, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014830507204541035}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_1.json new file mode 100644 index 0000000000000000000000000000000000000000..04694c43c20572d3b7115d03d2661cb5019c3ca9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "justified in saying", "acc": 0.332, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014899597242811483}, {"task_name": "anli_r1", "prompt_name": "justified in saying", "acc_norm": 0.337, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014955087918653603}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_2.json new file mode 100644 index 0000000000000000000000000000000000000000..6d66300ccfeeb644e7d0c7fd97c647ba677305c8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "justified in saying", "acc": 0.357, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015158521721486767}, {"task_name": "anli_r1", "prompt_name": "justified in saying", "acc_norm": 0.361, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01519572011817511}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_3.json new file mode 100644 index 0000000000000000000000000000000000000000..15d03d3ffbef72d13e2f487eeed4ee899747576f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "justified in saying", "acc": 0.351, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015100563798316402}, {"task_name": "anli_r1", "prompt_name": "justified in saying", "acc_norm": 0.363, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015213890444671285}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_4.json new file mode 100644 index 0000000000000000000000000000000000000000..e9a5ec5cf2280a2d6266be3e86fa21fe9d72823f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "justified in saying", "acc": 0.362, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015204840912919496}, {"task_name": "anli_r1", "prompt_name": "justified in saying", "acc_norm": 0.352, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015110404505648663}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_5.json new file mode 100644 index 0000000000000000000000000000000000000000..f38342165512222e650ab8883bf9d43d5c63d37b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r1_justified-in-saying_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r1", "prompt_name": "justified in saying", "acc": 0.35, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015090650341444233}, {"task_name": "anli_r1", "prompt_name": "justified in saying", "acc_norm": 0.349, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 1, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015080663991563104}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_0.json new file mode 100644 index 0000000000000000000000000000000000000000..2b614857128f0b9f567cf4ae0e689d23420c5293 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "GPT-3 style", "acc": 0.342, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015008706182121734}, {"task_name": "anli_r2", "prompt_name": "GPT-3 style", "acc_norm": 0.354, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015129868238451775}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_1.json new file mode 100644 index 0000000000000000000000000000000000000000..473694394d95163973df67a574aadfb7ef4909f1 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "GPT-3 style", "acc": 0.318, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014734079309311901}, {"task_name": "anli_r2", "prompt_name": "GPT-3 style", "acc_norm": 0.317, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014721675438880217}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_2.json new file mode 100644 index 0000000000000000000000000000000000000000..e597cac2e5d47424a661cfcbfe6295c695f2f2ed --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "GPT-3 style", "acc": 0.322, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014782913600996683}, {"task_name": "anli_r2", "prompt_name": "GPT-3 style", "acc_norm": 0.321, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014770821817934649}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_3.json new file mode 100644 index 0000000000000000000000000000000000000000..363da1d5f4d80b1bb897568336adbb688cbc1e70 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "GPT-3 style", "acc": 0.329, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014865395385928373}, {"task_name": "anli_r2", "prompt_name": "GPT-3 style", "acc_norm": 0.325, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014818724459095524}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_4.json new file mode 100644 index 0000000000000000000000000000000000000000..a858c40764fd2dade9d349ed52f865cf3a15a3c9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "GPT-3 style", "acc": 0.333, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014910846164229875}, {"task_name": "anli_r2", "prompt_name": "GPT-3 style", "acc_norm": 0.32, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014758652303574881}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_5.json new file mode 100644 index 0000000000000000000000000000000000000000..d98da76121b45bc284004f85f333f225f4bf3e4e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_GPT-3-style_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "GPT-3 style", "acc": 0.34, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014987482264363935}, {"task_name": "anli_r2", "prompt_name": "GPT-3 style", "acc_norm": 0.338, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014965960710224472}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_0.json new file mode 100644 index 0000000000000000000000000000000000000000..5bbd90685f4f47e702bc39971daf6d40da82d7c5 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "MNLI crowdsource", "acc": 0.334, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014922019523732958}, {"task_name": "anli_r2", "prompt_name": "MNLI crowdsource", "acc_norm": 0.335, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014933117490932577}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_1.json new file mode 100644 index 0000000000000000000000000000000000000000..2e63ee8c1d71358bbdeff8e377949c7077d9d70b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "MNLI crowdsource", "acc": 0.315, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014696631960792506}, {"task_name": "anli_r2", "prompt_name": "MNLI crowdsource", "acc_norm": 0.315, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014696631960792506}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_2.json new file mode 100644 index 0000000000000000000000000000000000000000..4957c0cffd0f5232dcbf0b0e496a129870ea34db --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "MNLI crowdsource", "acc": 0.321, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01477082181793465}, {"task_name": "anli_r2", "prompt_name": "MNLI crowdsource", "acc_norm": 0.338, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01496596071022447}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_3.json new file mode 100644 index 0000000000000000000000000000000000000000..7637151d98beebe3c480dbe30153b60cae9a1ad2 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "MNLI crowdsource", "acc": 0.309, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014619600977206491}, {"task_name": "anli_r2", "prompt_name": "MNLI crowdsource", "acc_norm": 0.311, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014645596385722694}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_4.json new file mode 100644 index 0000000000000000000000000000000000000000..c7edcbe4c1f11de1306a3c656bf30731bf8c08a0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "MNLI crowdsource", "acc": 0.291, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014370995982377939}, {"task_name": "anli_r2", "prompt_name": "MNLI crowdsource", "acc_norm": 0.3, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014498627873361425}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_5.json new file mode 100644 index 0000000000000000000000000000000000000000..b0553baee5677937c4e1adeaec3e5475187721db --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_MNLI-crowdsource_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "MNLI crowdsource", "acc": 0.325, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014818724459095524}, {"task_name": "anli_r2", "prompt_name": "MNLI crowdsource", "acc_norm": 0.321, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014770821817934645}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_0.json new file mode 100644 index 0000000000000000000000000000000000000000..90aae0226df6c250f04d75ef49a824bc0a5273c5 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "can we infer", "acc": 0.336, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01494414023379502}, {"task_name": "anli_r2", "prompt_name": "can we infer", "acc_norm": 0.345, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015039986742055242}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_1.json new file mode 100644 index 0000000000000000000000000000000000000000..6b38be840a0ca716647d3ca3d3b74ae1160a94e3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "can we infer", "acc": 0.331, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014888272588203928}, {"task_name": "anli_r2", "prompt_name": "can we infer", "acc_norm": 0.33, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01487687202745673}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ec346e5b36bd14d4d3a5b06fcb97b436a448012b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "can we infer", "acc": 0.327, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014842213153411237}, {"task_name": "anli_r2", "prompt_name": "can we infer", "acc_norm": 0.341, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014998131348402718}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_3.json new file mode 100644 index 0000000000000000000000000000000000000000..97934e85754bf0c91fbea79afc665863f9710516 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "can we infer", "acc": 0.332, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014899597242811492}, {"task_name": "anli_r2", "prompt_name": "can we infer", "acc_norm": 0.326, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014830507204541031}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_4.json new file mode 100644 index 0000000000000000000000000000000000000000..0980df62b5cd24fb730b929370a3cfa4e0858f9d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "can we infer", "acc": 0.321, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014770821817934652}, {"task_name": "anli_r2", "prompt_name": "can we infer", "acc_norm": 0.331, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01488827258820393}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_5.json new file mode 100644 index 0000000000000000000000000000000000000000..6877344e05cde557997f33166ac444936bcf03a0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_can-we-infer_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "can we infer", "acc": 0.331, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014888272588203924}, {"task_name": "anli_r2", "prompt_name": "can we infer", "acc_norm": 0.338, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01496596071022447}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_0.json new file mode 100644 index 0000000000000000000000000000000000000000..bddf6e9a175f3ac9742422e7307e87bd0d609d1c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "guaranteed/possible/impossible", "acc": 0.329, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014865395385928362}, {"task_name": "anli_r2", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.336, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014944140233795025}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_1.json new file mode 100644 index 0000000000000000000000000000000000000000..a0e818bcfd7d8d2d28724f2398bf1b9523385339 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "guaranteed/possible/impossible", "acc": 0.315, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014696631960792506}, {"task_name": "anli_r2", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.315, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014696631960792506}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_2.json new file mode 100644 index 0000000000000000000000000000000000000000..67e7ff27bfcb9e1cf6053043250f670373d040b8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "guaranteed/possible/impossible", "acc": 0.326, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01483050720454103}, {"task_name": "anli_r2", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.326, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014830507204541031}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_3.json new file mode 100644 index 0000000000000000000000000000000000000000..68e2665526e03b0e7a5d735e1a3e405a8364661d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "guaranteed/possible/impossible", "acc": 0.321, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014770821817934649}, {"task_name": "anli_r2", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.321, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014770821817934649}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_4.json new file mode 100644 index 0000000000000000000000000000000000000000..97e6412ddab1c4e9ad3550cf168f81e054d55c3f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "guaranteed/possible/impossible", "acc": 0.313, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014671272822977883}, {"task_name": "anli_r2", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.314, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014683991951087973}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_5.json new file mode 100644 index 0000000000000000000000000000000000000000..50d26dd2f21f918da5489ad8252b4292dd8933ce --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "guaranteed/possible/impossible", "acc": 0.314, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014683991951087973}, {"task_name": "anli_r2", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.312, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014658474370509012}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_0.json new file mode 100644 index 0000000000000000000000000000000000000000..3195265c884ec0790440bee62bfe36d13dbfee85 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "justified in saying", "acc": 0.335, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014933117490932577}, {"task_name": "anli_r2", "prompt_name": "justified in saying", "acc_norm": 0.353, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01512017260548369}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_1.json new file mode 100644 index 0000000000000000000000000000000000000000..1d1a648a10df5a855c88acc93bc051885ba52b94 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "justified in saying", "acc": 0.322, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014782913600996674}, {"task_name": "anli_r2", "prompt_name": "justified in saying", "acc_norm": 0.326, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014830507204541033}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_2.json new file mode 100644 index 0000000000000000000000000000000000000000..6f6594d94c4f967f3fb8f5b00a7ad6d0328812fa --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "justified in saying", "acc": 0.331, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014888272588203924}, {"task_name": "anli_r2", "prompt_name": "justified in saying", "acc_norm": 0.333, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014910846164229876}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_3.json new file mode 100644 index 0000000000000000000000000000000000000000..fc45f4ecd228d15ae4c6e523a388e6448580ed00 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "justified in saying", "acc": 0.325, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014818724459095526}, {"task_name": "anli_r2", "prompt_name": "justified in saying", "acc_norm": 0.322, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014782913600996683}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_4.json new file mode 100644 index 0000000000000000000000000000000000000000..694995dfc6e4b62562412fc37a1b06b472da5b89 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "justified in saying", "acc": 0.314, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014683991951087974}, {"task_name": "anli_r2", "prompt_name": "justified in saying", "acc_norm": 0.32, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014758652303574888}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_5.json new file mode 100644 index 0000000000000000000000000000000000000000..ecf7f7217f2e5e97ff51f07dcf19d352b375c28e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r2_justified-in-saying_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r2", "prompt_name": "justified in saying", "acc": 0.329, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014865395385928373}, {"task_name": "anli_r2", "prompt_name": "justified in saying", "acc_norm": 0.325, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 2, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014818724459095524}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_0.json new file mode 100644 index 0000000000000000000000000000000000000000..e7ab185b3d734fbd143da14ea5e8e1e2d8120ba3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "GPT-3 style", "acc": 0.32916666666666666, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013570806258433625}, {"task_name": "anli_r3", "prompt_name": "GPT-3 style", "acc_norm": 0.325, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013526454480351028}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_1.json new file mode 100644 index 0000000000000000000000000000000000000000..c1decd4934bb291300d831c99c7613e861771540 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "GPT-3 style", "acc": 0.35, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013774667009018558}, {"task_name": "anli_r3", "prompt_name": "GPT-3 style", "acc_norm": 0.3375, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013655897185463652}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_2.json new file mode 100644 index 0000000000000000000000000000000000000000..df8b993fabcbc5a2b4b906a5ffd84c97cb20190d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "GPT-3 style", "acc": 0.33666666666666667, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013647602942406387}, {"task_name": "anli_r3", "prompt_name": "GPT-3 style", "acc_norm": 0.3325, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013605417345710528}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_3.json new file mode 100644 index 0000000000000000000000000000000000000000..64d514716679528b4c820aebecf3b571c7f4e859 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "GPT-3 style", "acc": 0.3516666666666667, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013789711695404785}, {"task_name": "anli_r3", "prompt_name": "GPT-3 style", "acc_norm": 0.3566666666666667, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013833742805050722}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_4.json new file mode 100644 index 0000000000000000000000000000000000000000..beba43f46e8d848f3102d8720dc79b16be9e961a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "GPT-3 style", "acc": 0.3466666666666667, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013744022550571946}, {"task_name": "anli_r3", "prompt_name": "GPT-3 style", "acc_norm": 0.3283333333333333, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013562032919529019}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_5.json new file mode 100644 index 0000000000000000000000000000000000000000..3859c0a85acb2b741c5ad002b362e1652245c932 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_GPT-3-style_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "GPT-3 style", "acc": 0.3325, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013605417345710526}, {"task_name": "anli_r3", "prompt_name": "GPT-3 style", "acc_norm": 0.33916666666666667, "fixed_answer_choice_list": ["True", "Neither", "False"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "620aa3fc-d5eb-46f5-a1ee-4c754527aa97", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01367234349168182}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_0.json new file mode 100644 index 0000000000000000000000000000000000000000..d2de58ae9b2c0ce9ae96bb25be0e59ceb35632b5 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "MNLI crowdsource", "acc": 0.335, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013630871843821476}, {"task_name": "anli_r3", "prompt_name": "MNLI crowdsource", "acc_norm": 0.3433333333333333, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01371263383046586}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_1.json new file mode 100644 index 0000000000000000000000000000000000000000..49f2b1690dc30d74e7152e959923f87f16a7d5c9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "MNLI crowdsource", "acc": 0.33666666666666667, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013647602942406393}, {"task_name": "anli_r3", "prompt_name": "MNLI crowdsource", "acc_norm": 0.33666666666666667, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013647602942406393}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_2.json new file mode 100644 index 0000000000000000000000000000000000000000..bc44119df898ab95d8eb0413d2d45ac65671a77e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "MNLI crowdsource", "acc": 0.32666666666666666, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013544340907003665}, {"task_name": "anli_r3", "prompt_name": "MNLI crowdsource", "acc_norm": 0.32916666666666666, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013570806258433633}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_3.json new file mode 100644 index 0000000000000000000000000000000000000000..74edfd8e142ce4fc4d639f35710a03c3cf55c652 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "MNLI crowdsource", "acc": 0.33916666666666667, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013672343491681808}, {"task_name": "anli_r3", "prompt_name": "MNLI crowdsource", "acc_norm": 0.325, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013526454480351018}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_4.json new file mode 100644 index 0000000000000000000000000000000000000000..88e61fa09928a67f7c8dad9d0fe1624b9ac175e0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "MNLI crowdsource", "acc": 0.3225, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013499258621103249}, {"task_name": "anli_r3", "prompt_name": "MNLI crowdsource", "acc_norm": 0.32, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013471620929769139}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_5.json new file mode 100644 index 0000000000000000000000000000000000000000..77efb51822ddb0d171d4743a7cda568667959421 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_MNLI-crowdsource_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "MNLI crowdsource", "acc": 0.3258333333333333, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013535422043417464}, {"task_name": "anli_r3", "prompt_name": "MNLI crowdsource", "acc_norm": 0.31333333333333335, "fixed_answer_choice_list": ["Correct", "Inconclusive", "Incorrect"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "0cc3ae39-3997-4686-8c93-5d51457efa1f", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013395739415639082}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_0.json new file mode 100644 index 0000000000000000000000000000000000000000..aee1e1dbfe8f52abcd3df63ff870c8f70c2d78c0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "can we infer", "acc": 0.3475, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013751753243291852}, {"task_name": "anli_r3", "prompt_name": "can we infer", "acc_norm": 0.3408333333333333, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01368860079329693}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_1.json new file mode 100644 index 0000000000000000000000000000000000000000..860191f3730854e7b50b65eab410a91aa6c9610f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "can we infer", "acc": 0.3525, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01379716491891836}, {"task_name": "anli_r3", "prompt_name": "can we infer", "acc_norm": 0.34, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013680495725767797}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_2.json new file mode 100644 index 0000000000000000000000000000000000000000..e4319a05d14a0923c98e16c13b70b667692ba509 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "can we infer", "acc": 0.335, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013630871843821469}, {"task_name": "anli_r3", "prompt_name": "can we infer", "acc_norm": 0.33, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013579531277800917}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_3.json new file mode 100644 index 0000000000000000000000000000000000000000..3244aaf1f66795b5f3b365bbe52b7729454a6e7e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "can we infer", "acc": 0.345, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013728421539454876}, {"task_name": "anli_r3", "prompt_name": "can we infer", "acc_norm": 0.3375, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013655897185463664}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_4.json new file mode 100644 index 0000000000000000000000000000000000000000..aea6011af353831ef336078a07aea5d039f7c996 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "can we infer", "acc": 0.32666666666666666, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013544340907003663}, {"task_name": "anli_r3", "prompt_name": "can we infer", "acc_norm": 0.31, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013356596331200263}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_5.json new file mode 100644 index 0000000000000000000000000000000000000000..b44c5d2b45a0532fe19f3ab6775507cf561823da --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_can-we-infer_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "can we infer", "acc": 0.3175, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013443538681348052}, {"task_name": "anli_r3", "prompt_name": "can we infer", "acc_norm": 0.32666666666666666, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "c4ed37ae-d7d7-4197-a725-ef2152fa3b1f", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013544340907003663}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_0.json new file mode 100644 index 0000000000000000000000000000000000000000..e7171d61e2e70dfb208e86f8b0660f5cf4f21ca8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "guaranteed/possible/impossible", "acc": 0.33166666666666667, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013596836729485166}, {"task_name": "anli_r3", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.31416666666666665, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.0134053993149841}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_1.json new file mode 100644 index 0000000000000000000000000000000000000000..5164f6b82955153d993f8b15e578c7dc89a2cd78 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "guaranteed/possible/impossible", "acc": 0.33666666666666667, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013647602942406393}, {"task_name": "anli_r3", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.33666666666666667, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013647602942406393}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_2.json new file mode 100644 index 0000000000000000000000000000000000000000..7c1eae93abd1edca4a6f674419a29e5a0184082b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "guaranteed/possible/impossible", "acc": 0.32166666666666666, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013490095282989521}, {"task_name": "anli_r3", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.32, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013471620929769142}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_3.json new file mode 100644 index 0000000000000000000000000000000000000000..2c592a35e1f6454ef61982b22bd7825362c67fab --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "guaranteed/possible/impossible", "acc": 0.3416666666666667, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013696658778002515}, {"task_name": "anli_r3", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.3425, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013704669762934728}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d0262a2530801be5d88dea30581d5ff819bc6731 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "guaranteed/possible/impossible", "acc": 0.33416666666666667, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01362243481313678}, {"task_name": "anli_r3", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.3308333333333333, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013588208070708993}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_5.json new file mode 100644 index 0000000000000000000000000000000000000000..fe1746eae948ff9c778668734157faa5e916d503 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "guaranteed/possible/impossible", "acc": 0.31166666666666665, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013376268790982096}, {"task_name": "anli_r3", "prompt_name": "guaranteed/possible/impossible", "acc_norm": 0.31083333333333335, "fixed_answer_choice_list": ["Guaranteed", "Possible", "Impossible"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "ca24b93a-6265-462f-b140-e329c03d94fa", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {{ answer_choices[label] }}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01336645784596543}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_0.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_0.json new file mode 100644 index 0000000000000000000000000000000000000000..1c12b2ba4aefc6bb9634c1a04e81c15621a0b45c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "justified in saying", "acc": 0.3475, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013751753243291852}, {"task_name": "anli_r3", "prompt_name": "justified in saying", "acc_norm": 0.3408333333333333, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013688600793296934}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_1.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_1.json new file mode 100644 index 0000000000000000000000000000000000000000..156fc39e320f1b6ccc4b8379f645d76c57d2e521 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "justified in saying", "acc": 0.3383333333333333, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013664144006618268}, {"task_name": "anli_r3", "prompt_name": "justified in saying", "acc_norm": 0.33, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013579531277800922}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_2.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8c34149d13634acc1cf996145063c7fba062ed6c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "justified in saying", "acc": 0.3258333333333333, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01353542204341746}, {"task_name": "anli_r3", "prompt_name": "justified in saying", "acc_norm": 0.33, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013579531277800917}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_3.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_3.json new file mode 100644 index 0000000000000000000000000000000000000000..313c9c73e7afa2bc6c81c287dc712135ef125b69 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "justified in saying", "acc": 0.335, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01363087184382147}, {"task_name": "anli_r3", "prompt_name": "justified in saying", "acc_norm": 0.32916666666666666, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013570806258433625}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_4.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_4.json new file mode 100644 index 0000000000000000000000000000000000000000..f10a84009e85fa6b3032cb09b4c9507a65d0c180 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "justified in saying", "acc": 0.3258333333333333, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013535422043417462}, {"task_name": "anli_r3", "prompt_name": "justified in saying", "acc_norm": 0.3175, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013443538681348052}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_5.json b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_5.json new file mode 100644 index 0000000000000000000000000000000000000000..e1973e5d44361b2454465e2229ab3bae42e04b6b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_anli_r3_justified-in-saying_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "anli_r3", "prompt_name": "justified in saying", "acc": 0.31333333333333335, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.013395739415639082}, {"task_name": "anli_r3", "prompt_name": "justified in saying", "acc_norm": 0.32166666666666666, "fixed_answer_choice_list": ["Yes", "Maybe", "No"], "dataset_path": "anli", "dataset_name": null, "subset": 3, "prompt_id": "a850110d-f1a3-49b4-949a-d3bfe9f81344", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {{ answer_choices[label] }} ", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013490095282989521}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_0.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_0.json new file mode 100644 index 0000000000000000000000000000000000000000..79675b3a737015fbb08c78b21b6fd2e070b50d60 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc": 0.22013651877133106, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "540ebc31-2ea6-4feb-a6fd-67b6e71cf20a", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01210812488346098}, {"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc_norm": 0.22013651877133106, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "540ebc31-2ea6-4feb-a6fd-67b6e71cf20a", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01210812488346098}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_1.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_1.json new file mode 100644 index 0000000000000000000000000000000000000000..0b5de20843259e82daf50653f365de005078df01 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc": 0.2363481228668942, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "540ebc31-2ea6-4feb-a6fd-67b6e71cf20a", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01241496052430183}, {"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc_norm": 0.2363481228668942, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "540ebc31-2ea6-4feb-a6fd-67b6e71cf20a", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01241496052430183}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_2.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_2.json new file mode 100644 index 0000000000000000000000000000000000000000..913299a60e0f4f23a77dd98068ab2c29129709a3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc": 0.2354948805460751, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "540ebc31-2ea6-4feb-a6fd-67b6e71cf20a", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012399451855004748}, {"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc_norm": 0.2354948805460751, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "540ebc31-2ea6-4feb-a6fd-67b6e71cf20a", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012399451855004748}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_3.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_3.json new file mode 100644 index 0000000000000000000000000000000000000000..2a95a2ca29aeb8b4c98585d62ed66f456432781d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc": 0.2508532423208191, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "540ebc31-2ea6-4feb-a6fd-67b6e71cf20a", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012668198621315433}, {"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc_norm": 0.2508532423208191, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "540ebc31-2ea6-4feb-a6fd-67b6e71cf20a", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012668198621315433}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_4.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_4.json new file mode 100644 index 0000000000000000000000000000000000000000..05203534f96b070d628d74b5eb3f4bfeb9c19064 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc": 0.2593856655290102, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "540ebc31-2ea6-4feb-a6fd-67b6e71cf20a", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012808273573927094}, {"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc_norm": 0.2593856655290102, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "540ebc31-2ea6-4feb-a6fd-67b6e71cf20a", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012808273573927094}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_5.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_5.json new file mode 100644 index 0000000000000000000000000000000000000000..e21427f18171dfbd47ad17feb0e763ed559c442e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_heres_a_problem_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc": 0.24488054607508533, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "540ebc31-2ea6-4feb-a6fd-67b6e71cf20a", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012566273985131358}, {"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc_norm": 0.24488054607508533, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "540ebc31-2ea6-4feb-a6fd-67b6e71cf20a", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012566273985131358}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_0.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_0.json new file mode 100644 index 0000000000000000000000000000000000000000..94f184003fb701dbfb538a714546bc80a0294f24 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc": 0.2098976109215017, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ff84886-9d5f-40d1-80d7-2a39b7c16ec6", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011900548748047446}, {"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc_norm": 0.2619453924914676, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ff84886-9d5f-40d1-80d7-2a39b7c16ec6", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012849054826858117}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_1.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_1.json new file mode 100644 index 0000000000000000000000000000000000000000..e950aa1dcb755e08861bfae23794b6306e77d1be --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc": 0.2150170648464164, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ff84886-9d5f-40d1-80d7-2a39b7c16ec6", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012005717634133611}, {"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc_norm": 0.2593856655290102, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ff84886-9d5f-40d1-80d7-2a39b7c16ec6", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012808273573927095}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_2.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_2.json new file mode 100644 index 0000000000000000000000000000000000000000..76243564e374da886e75833c48207dfdef9160a0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc": 0.2150170648464164, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ff84886-9d5f-40d1-80d7-2a39b7c16ec6", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012005717634133608}, {"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc_norm": 0.2645051194539249, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ff84886-9d5f-40d1-80d7-2a39b7c16ec6", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012889272949313364}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_3.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_3.json new file mode 100644 index 0000000000000000000000000000000000000000..ca85be8330787a59b7fb945a79ae0a44f2919e1b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc": 0.2150170648464164, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ff84886-9d5f-40d1-80d7-2a39b7c16ec6", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012005717634133608}, {"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc_norm": 0.2645051194539249, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ff84886-9d5f-40d1-80d7-2a39b7c16ec6", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012889272949313364}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_4.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_4.json new file mode 100644 index 0000000000000000000000000000000000000000..4891e6d6c8969c96fcedad673660e7a59d16d3ee --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc": 0.22525597269624573, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ff84886-9d5f-40d1-80d7-2a39b7c16ec6", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0122078399954073}, {"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc_norm": 0.26791808873720135, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ff84886-9d5f-40d1-80d7-2a39b7c16ec6", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012942030195136425}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_5.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_5.json new file mode 100644 index 0000000000000000000000000000000000000000..b007d7214f724b778c681af8271bbceb5a142440 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_i_am_hesitating_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc": 0.23293515358361774, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ff84886-9d5f-40d1-80d7-2a39b7c16ec6", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012352507042617405}, {"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc_norm": 0.257679180887372, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ff84886-9d5f-40d1-80d7-2a39b7c16ec6", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012780770562768414}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_0.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_0.json new file mode 100644 index 0000000000000000000000000000000000000000..a6d5177f98e7ebff63cb9c14166879b9d6e302e1 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc": 0.2235494880546075, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "ced2b33b-b590-4522-b041-51d7dd669561", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012174896631202612}, {"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc_norm": 0.27303754266211605, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "ced2b33b-b590-4522-b041-51d7dd669561", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.013019332762635734}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_1.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_1.json new file mode 100644 index 0000000000000000000000000000000000000000..74a4b461c0deb641d5600ee65d5da17d8e496410 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc": 0.22098976109215018, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "ced2b33b-b590-4522-b041-51d7dd669561", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012124929206818258}, {"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc_norm": 0.25597269624573377, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "ced2b33b-b590-4522-b041-51d7dd669561", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012753013241244513}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_2.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_2.json new file mode 100644 index 0000000000000000000000000000000000000000..a9f328dddb94b00b421a9ffec732ce7e45887ed3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc": 0.20733788395904437, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "ced2b33b-b590-4522-b041-51d7dd669561", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011846905782971385}, {"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc_norm": 0.24914675767918087, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "ced2b33b-b590-4522-b041-51d7dd669561", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012639407111926432}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_3.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_3.json new file mode 100644 index 0000000000000000000000000000000000000000..04421aeff114585b6677ab80704b244cf74a9c2e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc": 0.22440273037542663, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "ced2b33b-b590-4522-b041-51d7dd669561", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012191404938603838}, {"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc_norm": 0.25, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "ced2b33b-b590-4522-b041-51d7dd669561", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012653835621466646}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_4.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d0c1ea578f394ace88bb2613d2821224af836011 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc": 0.22525597269624573, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "ced2b33b-b590-4522-b041-51d7dd669561", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012207839995407307}, {"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc_norm": 0.25, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "ced2b33b-b590-4522-b041-51d7dd669561", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012653835621466646}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_5.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_5.json new file mode 100644 index 0000000000000000000000000000000000000000..6b52ca09146762aed4723fb3de80e0ec5c8019c4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_multiple_choice_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc": 0.2030716723549488, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "ced2b33b-b590-4522-b041-51d7dd669561", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011755899303705582}, {"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc_norm": 0.24658703071672355, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "ced2b33b-b590-4522-b041-51d7dd669561", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01259572626879013}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_0.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_0.json new file mode 100644 index 0000000000000000000000000000000000000000..b183e4e739a56a314f34cd5169038556a8f4ea68 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc": 0.22013651877133106, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "e371fc1a-8edb-477b-b345-9d73e97ffade", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012108124883460976}, {"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc_norm": 0.22013651877133106, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "e371fc1a-8edb-477b-b345-9d73e97ffade", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012108124883460976}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_1.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_1.json new file mode 100644 index 0000000000000000000000000000000000000000..00589ce484fa53ee3e6766e33bccc80770921ecf --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc": 0.2354948805460751, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "e371fc1a-8edb-477b-b345-9d73e97ffade", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012399451855004746}, {"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc_norm": 0.2354948805460751, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "e371fc1a-8edb-477b-b345-9d73e97ffade", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012399451855004746}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_2.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_2.json new file mode 100644 index 0000000000000000000000000000000000000000..e1e179fe413a1a1e4fa4725a2276eb15ec621170 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc": 0.23122866894197952, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "e371fc1a-8edb-477b-b345-9d73e97ffade", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012320858834772273}, {"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc_norm": 0.23122866894197952, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "e371fc1a-8edb-477b-b345-9d73e97ffade", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012320858834772273}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_3.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_3.json new file mode 100644 index 0000000000000000000000000000000000000000..58e38a97a510046073b2e22f6d35aea5c4eb6ece --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc": 0.2380546075085324, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "e371fc1a-8edb-477b-b345-9d73e97ffade", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012445770028026206}, {"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc_norm": 0.2380546075085324, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "e371fc1a-8edb-477b-b345-9d73e97ffade", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012445770028026206}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_4.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_4.json new file mode 100644 index 0000000000000000000000000000000000000000..7b1adffb36e2d31cad416da7dfb0c324026ce459 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc": 0.24914675767918087, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "e371fc1a-8edb-477b-b345-9d73e97ffade", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012639407111926435}, {"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc_norm": 0.24914675767918087, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "e371fc1a-8edb-477b-b345-9d73e97ffade", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012639407111926435}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_5.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_5.json new file mode 100644 index 0000000000000000000000000000000000000000..6cebeb5440f9312893deb5802466401b9cbe4783 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc": 0.2440273037542662, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "e371fc1a-8edb-477b-b345-9d73e97ffade", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012551447627856255}, {"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc_norm": 0.2440273037542662, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "e371fc1a-8edb-477b-b345-9d73e97ffade", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012551447627856255}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_0.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_0.json new file mode 100644 index 0000000000000000000000000000000000000000..cebfb1f167baa1fffe3e1b87d3f1f97f8e7c21f7 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "qa_options", "acc": 0.20477815699658702, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ec2b8ca-e4c0-444e-b097-89ccce811550", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0117925443385134}, {"task_name": "arc_easy", "prompt_name": "qa_options", "acc_norm": 0.2619453924914676, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ec2b8ca-e4c0-444e-b097-89ccce811550", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012849054826858117}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_1.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_1.json new file mode 100644 index 0000000000000000000000000000000000000000..3c033626f7019c1279a66e457d64830f732cab1d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "qa_options", "acc": 0.2098976109215017, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ec2b8ca-e4c0-444e-b097-89ccce811550", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011900548748047452}, {"task_name": "arc_easy", "prompt_name": "qa_options", "acc_norm": 0.2525597269624573, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ec2b8ca-e4c0-444e-b097-89ccce811550", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01269672898020771}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_2.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_2.json new file mode 100644 index 0000000000000000000000000000000000000000..7fda207a2012bb0eb8550aa4618608b2719e8b9d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "qa_options", "acc": 0.21160409556313994, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ec2b8ca-e4c0-444e-b097-89ccce811550", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011935916358632863}, {"task_name": "arc_easy", "prompt_name": "qa_options", "acc_norm": 0.2636518771331058, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ec2b8ca-e4c0-444e-b097-89ccce811550", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012875929151297056}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_3.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_3.json new file mode 100644 index 0000000000000000000000000000000000000000..d1e2130783fccced468cba06cb439d7a2c6e1423 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "qa_options", "acc": 0.2090443686006826, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ec2b8ca-e4c0-444e-b097-89ccce811550", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011882746987406448}, {"task_name": "arc_easy", "prompt_name": "qa_options", "acc_norm": 0.26706484641638223, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ec2b8ca-e4c0-444e-b097-89ccce811550", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01292893319649635}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_4.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_4.json new file mode 100644 index 0000000000000000000000000000000000000000..537f3bcf069414480ed567370a5bfeacea0252d2 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "qa_options", "acc": 0.22866894197952217, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ec2b8ca-e4c0-444e-b097-89ccce811550", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012272853582540807}, {"task_name": "arc_easy", "prompt_name": "qa_options", "acc_norm": 0.27047781569965873, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ec2b8ca-e4c0-444e-b097-89ccce811550", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.012980954547659556}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_5.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_5.json new file mode 100644 index 0000000000000000000000000000000000000000..a22643dafa16b2a1ab95cf063a2c937d587f88fa --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_challenge_qa_options_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "qa_options", "acc": 0.22098976109215018, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ec2b8ca-e4c0-444e-b097-89ccce811550", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.012124929206818258}, {"task_name": "arc_easy", "prompt_name": "qa_options", "acc_norm": 0.2627986348122867, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "subset": null, "prompt_id": "5ec2b8ca-e4c0-444e-b097-89ccce811550", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01286252317535133}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_0.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_0.json new file mode 100644 index 0000000000000000000000000000000000000000..e3e67c7d68ba9156402391dd418f3c32f755321d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc": 0.25673400673400676, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "d90da519-0e2c-4f9b-a546-7cba82824eb2", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008963590834042407}, {"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc_norm": 0.25673400673400676, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "d90da519-0e2c-4f9b-a546-7cba82824eb2", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008963590834042407}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_1.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_1.json new file mode 100644 index 0000000000000000000000000000000000000000..55573a459abcd8c8c54dcc38e4514a51ef3da580 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc": 0.2478956228956229, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "d90da519-0e2c-4f9b-a546-7cba82824eb2", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008860162361464028}, {"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc_norm": 0.2478956228956229, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "d90da519-0e2c-4f9b-a546-7cba82824eb2", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008860162361464028}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_2.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_2.json new file mode 100644 index 0000000000000000000000000000000000000000..2f11c43a37042306febf8d302c178ea78e770ac3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc": 0.26136363636363635, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "d90da519-0e2c-4f9b-a546-7cba82824eb2", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.00901583836660821}, {"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc_norm": 0.26136363636363635, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "d90da519-0e2c-4f9b-a546-7cba82824eb2", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.00901583836660821}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_3.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_3.json new file mode 100644 index 0000000000000000000000000000000000000000..e10486598ac6ee60d33638894f5ee180bd80ce8c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc": 0.2588383838383838, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "d90da519-0e2c-4f9b-a546-7cba82824eb2", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008987501845758047}, {"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc_norm": 0.2588383838383838, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "d90da519-0e2c-4f9b-a546-7cba82824eb2", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008987501845758047}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_4.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_4.json new file mode 100644 index 0000000000000000000000000000000000000000..dbc3a3ae0bf6ea2679c60c775be3a88b2c12100e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc": 0.25084175084175087, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "d90da519-0e2c-4f9b-a546-7cba82824eb2", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008895183010487386}, {"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc_norm": 0.25084175084175087, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "d90da519-0e2c-4f9b-a546-7cba82824eb2", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008895183010487386}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_5.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_5.json new file mode 100644 index 0000000000000000000000000000000000000000..a4ef90e61822260dba0a43e1bc85a5a78126bff8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_heres_a_problem_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc": 0.2622053872053872, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "d90da519-0e2c-4f9b-a546-7cba82824eb2", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009025197991724831}, {"task_name": "arc_easy", "prompt_name": "heres_a_problem", "acc_norm": 0.2622053872053872, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "d90da519-0e2c-4f9b-a546-7cba82824eb2", "prompt_jinja": "Here's a problem to solve: {{question}}\n\nAmong the 4 following options, which is the correct answer?\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n {% endfor %}|||{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009025197991724831}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_0.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_0.json new file mode 100644 index 0000000000000000000000000000000000000000..0ec6b071f38f3d50773026299bc0767fef20ba50 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc": 0.3021885521885522, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "4fb13ac1-f770-45ea-b5d5-91ac50b0d609", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009422719042483188}, {"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc_norm": 0.2866161616161616, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "4fb13ac1-f770-45ea-b5d5-91ac50b0d609", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009278551100969298}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_1.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_1.json new file mode 100644 index 0000000000000000000000000000000000000000..fa608c239eeec1f6d2b944ac8c44988e1bd2008f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc": 0.2828282828282828, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "4fb13ac1-f770-45ea-b5d5-91ac50b0d609", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009241472775328228}, {"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc_norm": 0.2840909090909091, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "4fb13ac1-f770-45ea-b5d5-91ac50b0d609", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009253921261885763}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_2.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_2.json new file mode 100644 index 0000000000000000000000000000000000000000..fdf01454f40fddaa165f81e277f54f8a6d3933a0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc": 0.2824074074074074, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "4fb13ac1-f770-45ea-b5d5-91ac50b0d609", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009237303403479327}, {"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc_norm": 0.2765151515151515, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "4fb13ac1-f770-45ea-b5d5-91ac50b0d609", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.00917788010146828}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_3.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_3.json new file mode 100644 index 0000000000000000000000000000000000000000..ce6809d3865ea6bd68a4a92bddcccb8b59101f6a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc": 0.27735690235690236, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "4fb13ac1-f770-45ea-b5d5-91ac50b0d609", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009186490105111902}, {"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc_norm": 0.27735690235690236, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "4fb13ac1-f770-45ea-b5d5-91ac50b0d609", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009186490105111899}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_4.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_4.json new file mode 100644 index 0000000000000000000000000000000000000000..6f6ba02e60242cae0bf012fa87e6ad9838cdfe41 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc": 0.27441077441077444, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "4fb13ac1-f770-45ea-b5d5-91ac50b0d609", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009156177122244532}, {"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc_norm": 0.2777777777777778, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "4fb13ac1-f770-45ea-b5d5-91ac50b0d609", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009190779909649918}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_5.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_5.json new file mode 100644 index 0000000000000000000000000000000000000000..58ff3ac361f1cc3a77bc7a8524fcbb02a24da8f3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_i_am_hesitating_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc": 0.27441077441077444, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "4fb13ac1-f770-45ea-b5d5-91ac50b0d609", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009156177122244532}, {"task_name": "arc_easy", "prompt_name": "i_am_hesitating", "acc_norm": 0.27525252525252525, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "4fb13ac1-f770-45ea-b5d5-91ac50b0d609", "prompt_jinja": "I am hesitating between 4 options to answer the following question, which option should I choose?\nQuestion: {{question}}\nPossibilities:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009164888895174743}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_0.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_0.json new file mode 100644 index 0000000000000000000000000000000000000000..ae522920df0d99d7fe6269a55df1433c582535d7 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc": 0.2895622895622896, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "8c689423-880d-402b-8c7d-a1a98c7589e8", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009306838912173902}, {"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc_norm": 0.2706228956228956, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "8c689423-880d-402b-8c7d-a1a98c7589e8", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009116466166403821}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_1.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_1.json new file mode 100644 index 0000000000000000000000000000000000000000..30dc4d4a6dfc08cf398502680015b328673ed5ed --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc": 0.2781986531986532, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "8c689423-880d-402b-8c7d-a1a98c7589e8", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009195059601583901}, {"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc_norm": 0.27525252525252525, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "8c689423-880d-402b-8c7d-a1a98c7589e8", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009164888895174743}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_2.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f7009d17b4c4609c3b2b02eaba0d43641c0eb30c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc": 0.2760942760942761, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "8c689423-880d-402b-8c7d-a1a98c7589e8", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009173559873835262}, {"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc_norm": 0.2718855218855219, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "8c689423-880d-402b-8c7d-a1a98c7589e8", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009129795867310489}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_3.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_3.json new file mode 100644 index 0000000000000000000000000000000000000000..eb583cf443552af35827d7f24885347d181b562b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc": 0.2697811447811448, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "8c689423-880d-402b-8c7d-a1a98c7589e8", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009107527914671064}, {"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc_norm": 0.2748316498316498, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "8c689423-880d-402b-8c7d-a1a98c7589e8", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009160538115254952}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_4.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_4.json new file mode 100644 index 0000000000000000000000000000000000000000..3f1d65b08057008d4f78ec998c6792acb344a5a2 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc": 0.26262626262626265, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "8c689423-880d-402b-8c7d-a1a98c7589e8", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.00902986177676375}, {"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc_norm": 0.26052188552188554, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "8c689423-880d-402b-8c7d-a1a98c7589e8", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009006435890336588}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_5.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_5.json new file mode 100644 index 0000000000000000000000000000000000000000..6025f0a070831d4b8b5898adb2942cc72f38619f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_multiple_choice_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc": 0.2588383838383838, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "8c689423-880d-402b-8c7d-a1a98c7589e8", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.00898750184575805}, {"task_name": "arc_easy", "prompt_name": "multiple_choice", "acc_norm": 0.2563131313131313, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "8c689423-880d-402b-8c7d-a1a98c7589e8", "prompt_jinja": "I gave my students this multiple choice question: {{question}}\n\nOnly one answer is correct among these 4 choices:\n- {{answer_choices | join(\"\\n- \")}}\n\nCould you tell me which one is correct?|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008958775997918368}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_0.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_0.json new file mode 100644 index 0000000000000000000000000000000000000000..93573976930b1c5cde04bae50d3bc429943aa853 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc": 0.2478956228956229, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "033498ca-3d9a-47e3-b631-d881ab53b5ad", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008860162361464027}, {"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc_norm": 0.2478956228956229, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "033498ca-3d9a-47e3-b631-d881ab53b5ad", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008860162361464027}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_1.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_1.json new file mode 100644 index 0000000000000000000000000000000000000000..9b57521dd33713aeeedfdfa2d7ce8ee31cd4171a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc": 0.25, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "033498ca-3d9a-47e3-b631-d881ab53b5ad", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008885233166386385}, {"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc_norm": 0.25, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "033498ca-3d9a-47e3-b631-d881ab53b5ad", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008885233166386385}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_2.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f9d578a89209c3cba17d64551adfa2909be2b4ee --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc": 0.26641414141414144, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "033498ca-3d9a-47e3-b631-d881ab53b5ad", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009071357971078681}, {"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc_norm": 0.26641414141414144, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "033498ca-3d9a-47e3-b631-d881ab53b5ad", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009071357971078681}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_3.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_3.json new file mode 100644 index 0000000000000000000000000000000000000000..e77c9ff34e57b0943561b809fd6fb166d5d32895 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc": 0.2588383838383838, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "033498ca-3d9a-47e3-b631-d881ab53b5ad", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008987501845758049}, {"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc_norm": 0.2588383838383838, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "033498ca-3d9a-47e3-b631-d881ab53b5ad", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008987501845758049}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_4.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_4.json new file mode 100644 index 0000000000000000000000000000000000000000..36a5ddf80bb199cc53ed247f9c818c86a0180186 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc": 0.25084175084175087, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "033498ca-3d9a-47e3-b631-d881ab53b5ad", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008895183010487386}, {"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc_norm": 0.25084175084175087, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "033498ca-3d9a-47e3-b631-d881ab53b5ad", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008895183010487386}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_5.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_5.json new file mode 100644 index 0000000000000000000000000000000000000000..8de272756c779ffc9d62a0e6463e75da5fdbbfac --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_pick_the_most_correct_option_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc": 0.2537878787878788, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "033498ca-3d9a-47e3-b631-d881ab53b5ad", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008929657065808293}, {"task_name": "arc_easy", "prompt_name": "pick_the_most_correct_option", "acc_norm": 0.2537878787878788, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "033498ca-3d9a-47e3-b631-d881ab53b5ad", "prompt_jinja": "Pick the most correct option to answer the following question.\n\n{{question}}\n\nOptions:\n{% for letter, t in zip(answer_choices, choices.text) %}\n- {{letter}}: {{t}}\n{% endfor %} |||\n{{answerKey}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008929657065808293}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_0.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_0.json new file mode 100644 index 0000000000000000000000000000000000000000..69951c5ff3350a3aa564b95cf376db5e05adbde7 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "qa_options", "acc": 0.30092592592592593, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "252aa566-9482-4e81-aad9-664a9bebd8e8", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009411516193787188}, {"task_name": "arc_easy", "prompt_name": "qa_options", "acc_norm": 0.2845117845117845, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "252aa566-9482-4e81-aad9-664a9bebd8e8", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009258050925618823}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_1.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_1.json new file mode 100644 index 0000000000000000000000000000000000000000..79e9bbfa4675ec4f11b9e8e95593ed353f48c551 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "qa_options", "acc": 0.29419191919191917, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "252aa566-9482-4e81-aad9-664a9bebd8e8", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009350328648861737}, {"task_name": "arc_easy", "prompt_name": "qa_options", "acc_norm": 0.28324915824915825, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "252aa566-9482-4e81-aad9-664a9bebd8e8", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009245632200075453}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_2.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_2.json new file mode 100644 index 0000000000000000000000000000000000000000..c35b196aa9a570309e5d0cbd9d8eaff4e1e00713 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "qa_options", "acc": 0.289983164983165, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "252aa566-9482-4e81-aad9-664a9bebd8e8", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009310840970769035}, {"task_name": "arc_easy", "prompt_name": "qa_options", "acc_norm": 0.2777777777777778, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "252aa566-9482-4e81-aad9-664a9bebd8e8", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009190779909649923}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_3.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_3.json new file mode 100644 index 0000000000000000000000000000000000000000..55cc13e86a90cae8be6218af4ca424dfc97d4c22 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "qa_options", "acc": 0.28619528619528617, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "252aa566-9482-4e81-aad9-664a9bebd8e8", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009274470774627732}, {"task_name": "arc_easy", "prompt_name": "qa_options", "acc_norm": 0.2798821548821549, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "252aa566-9482-4e81-aad9-664a9bebd8e8", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009212077524656529}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_4.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_4.json new file mode 100644 index 0000000000000000000000000000000000000000..579975e900fb03dafbd6b80db8c50cb4fc04501f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "qa_options", "acc": 0.2828282828282828, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "252aa566-9482-4e81-aad9-664a9bebd8e8", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009241472775328228}, {"task_name": "arc_easy", "prompt_name": "qa_options", "acc_norm": 0.2777777777777778, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "252aa566-9482-4e81-aad9-664a9bebd8e8", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009190779909649916}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_5.json b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_5.json new file mode 100644 index 0000000000000000000000000000000000000000..de5e926d3af8144fffab9bb4fc94bd32d4578791 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_arc_easy_qa_options_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "arc_easy", "prompt_name": "qa_options", "acc": 0.2760942760942761, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "252aa566-9482-4e81-aad9-664a9bebd8e8", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009173559873835264}, {"task_name": "arc_easy", "prompt_name": "qa_options", "acc_norm": 0.2786195286195286, "fixed_answer_choice_list": null, "dataset_path": "ai2_arc", "dataset_name": "ARC-Easy", "subset": null, "prompt_id": "252aa566-9482-4e81-aad9-664a9bebd8e8", "prompt_jinja": "{{question}}\n\nOptions:\n- {{answer_choices | join(\"\\n- \")}}|||\n{{answer_choices[choices[\"label\"].index(answerKey)]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.00919932919502635}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_0.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_0.json new file mode 100644 index 0000000000000000000000000000000000000000..c74df91f5a39de9d0cccac46d1918a10b8f2715e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "GPT-3 Style", "acc": 0.5703333333333334, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "492f0f88-4370-46cd-839b-1de37a55aeda", "prompt_jinja": "{{ passage }} \nQuestion: {{ question }}\nAnswer: ||| \n{% if label != -1 %}\n{{ answer_choices[label] }}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009039449353930876}, {"task_name": "boolq", "prompt_name": "GPT-3 Style", "acc_norm": 0.6243333333333333, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "492f0f88-4370-46cd-839b-1de37a55aeda", "prompt_jinja": "{{ passage }} \nQuestion: {{ question }}\nAnswer: ||| \n{% if label != -1 %}\n{{ answer_choices[label] }}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008843442555522142}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_1.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_1.json new file mode 100644 index 0000000000000000000000000000000000000000..a3a41e2008948fcea59ec44a342ec4a8e92ebc13 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "GPT-3 Style", "acc": 0.536, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "492f0f88-4370-46cd-839b-1de37a55aeda", "prompt_jinja": "{{ passage }} \nQuestion: {{ question }}\nAnswer: ||| \n{% if label != -1 %}\n{{ answer_choices[label] }}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009106534814375936}, {"task_name": "boolq", "prompt_name": "GPT-3 Style", "acc_norm": 0.576, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "492f0f88-4370-46cd-839b-1de37a55aeda", "prompt_jinja": "{{ passage }} \nQuestion: {{ question }}\nAnswer: ||| \n{% if label != -1 %}\n{{ answer_choices[label] }}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009024142344197916}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_2.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_2.json new file mode 100644 index 0000000000000000000000000000000000000000..fd0c46876726aca3dfbf83496ab14ebf764d034b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "GPT-3 Style", "acc": 0.5546666666666666, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "492f0f88-4370-46cd-839b-1de37a55aeda", "prompt_jinja": "{{ passage }} \nQuestion: {{ question }}\nAnswer: ||| \n{% if label != -1 %}\n{{ answer_choices[label] }}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009075496684215473}, {"task_name": "boolq", "prompt_name": "GPT-3 Style", "acc_norm": 0.5843333333333334, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "492f0f88-4370-46cd-839b-1de37a55aeda", "prompt_jinja": "{{ passage }} \nQuestion: {{ question }}\nAnswer: ||| \n{% if label != -1 %}\n{{ answer_choices[label] }}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.00899942365775079}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_3.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_3.json new file mode 100644 index 0000000000000000000000000000000000000000..3683b0dd44a8eb85baa19f2554814163b62cdbd0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "GPT-3 Style", "acc": 0.5446666666666666, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "492f0f88-4370-46cd-839b-1de37a55aeda", "prompt_jinja": "{{ passage }} \nQuestion: {{ question }}\nAnswer: ||| \n{% if label != -1 %}\n{{ answer_choices[label] }}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009093726495969151}, {"task_name": "boolq", "prompt_name": "GPT-3 Style", "acc_norm": 0.5836666666666667, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "492f0f88-4370-46cd-839b-1de37a55aeda", "prompt_jinja": "{{ passage }} \nQuestion: {{ question }}\nAnswer: ||| \n{% if label != -1 %}\n{{ answer_choices[label] }}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009001498317147612}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_4.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_4.json new file mode 100644 index 0000000000000000000000000000000000000000..8ed9c327b55c97efa89a74ffc875c5c65d09a5f0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "GPT-3 Style", "acc": 0.551, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "492f0f88-4370-46cd-839b-1de37a55aeda", "prompt_jinja": "{{ passage }} \nQuestion: {{ question }}\nAnswer: ||| \n{% if label != -1 %}\n{{ answer_choices[label] }}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009082611478924378}, {"task_name": "boolq", "prompt_name": "GPT-3 Style", "acc_norm": 0.58, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "492f0f88-4370-46cd-839b-1de37a55aeda", "prompt_jinja": "{{ passage }} \nQuestion: {{ question }}\nAnswer: ||| \n{% if label != -1 %}\n{{ answer_choices[label] }}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009012606487132153}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_5.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_5.json new file mode 100644 index 0000000000000000000000000000000000000000..cf426fb3cb6be0dc178806fd374d1ded5304c8e1 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_GPT-3-Style_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "GPT-3 Style", "acc": 0.5473333333333333, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "492f0f88-4370-46cd-839b-1de37a55aeda", "prompt_jinja": "{{ passage }} \nQuestion: {{ question }}\nAnswer: ||| \n{% if label != -1 %}\n{{ answer_choices[label] }}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009089227499483243}, {"task_name": "boolq", "prompt_name": "GPT-3 Style", "acc_norm": 0.5796666666666667, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "492f0f88-4370-46cd-839b-1de37a55aeda", "prompt_jinja": "{{ passage }} \nQuestion: {{ question }}\nAnswer: ||| \n{% if label != -1 %}\n{{ answer_choices[label] }}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009013590979636827}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_0.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_0.json new file mode 100644 index 0000000000000000000000000000000000000000..83c86db2c91e339c8c4b0ac536fbb04d5cebfc80 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "after_reading", "acc": 0.6236666666666667, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "3e386463-1715-4578-9cba-07d11a0d3b61", "prompt_jinja": "Passage: {{passage}}\n\nAfter reading this passage, I have a question: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008846558976258922}, {"task_name": "boolq", "prompt_name": "after_reading", "acc_norm": 0.5433333333333333, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "3e386463-1715-4578-9cba-07d11a0d3b61", "prompt_jinja": "Passage: {{passage}}\n\nAfter reading this passage, I have a question: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009095877403306734}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_1.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_1.json new file mode 100644 index 0000000000000000000000000000000000000000..a4f3069495c4fe83fd5dbd76b282d8cf9e9ddad3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "after_reading", "acc": 0.575, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "3e386463-1715-4578-9cba-07d11a0d3b61", "prompt_jinja": "Passage: {{passage}}\n\nAfter reading this passage, I have a question: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.00902693165837962}, {"task_name": "boolq", "prompt_name": "after_reading", "acc_norm": 0.5653333333333334, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "3e386463-1715-4578-9cba-07d11a0d3b61", "prompt_jinja": "Passage: {{passage}}\n\nAfter reading this passage, I have a question: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009051951785603833}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_2.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_2.json new file mode 100644 index 0000000000000000000000000000000000000000..2c729f1ddc485533700bfc494d9349fd182a359b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "after_reading", "acc": 0.593, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "3e386463-1715-4578-9cba-07d11a0d3b61", "prompt_jinja": "Passage: {{passage}}\n\nAfter reading this passage, I have a question: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.00897090625594853}, {"task_name": "boolq", "prompt_name": "after_reading", "acc_norm": 0.5863333333333334, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "3e386463-1715-4578-9cba-07d11a0d3b61", "prompt_jinja": "Passage: {{passage}}\n\nAfter reading this passage, I have a question: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008993097934268066}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_3.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_3.json new file mode 100644 index 0000000000000000000000000000000000000000..6c56dbc54781eb03e191021539e4a482f55ee8d2 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "after_reading", "acc": 0.5793333333333334, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "3e386463-1715-4578-9cba-07d11a0d3b61", "prompt_jinja": "Passage: {{passage}}\n\nAfter reading this passage, I have a question: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.00901457125468042}, {"task_name": "boolq", "prompt_name": "after_reading", "acc_norm": 0.571, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "3e386463-1715-4578-9cba-07d11a0d3b61", "prompt_jinja": "Passage: {{passage}}\n\nAfter reading this passage, I have a question: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.00903771136639389}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_4.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_4.json new file mode 100644 index 0000000000000000000000000000000000000000..ac357b86e1ac0e127353abbd575190e5e9fe3942 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "after_reading", "acc": 0.5696666666666667, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "3e386463-1715-4578-9cba-07d11a0d3b61", "prompt_jinja": "Passage: {{passage}}\n\nAfter reading this passage, I have a question: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009041170615977855}, {"task_name": "boolq", "prompt_name": "after_reading", "acc_norm": 0.5556666666666666, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "3e386463-1715-4578-9cba-07d11a0d3b61", "prompt_jinja": "Passage: {{passage}}\n\nAfter reading this passage, I have a question: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009073469569248527}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_5.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_5.json new file mode 100644 index 0000000000000000000000000000000000000000..397e3e93f28fab199c8e8c54b47ff4c29c341c28 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_after_reading_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "after_reading", "acc": 0.5663333333333334, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "3e386463-1715-4578-9cba-07d11a0d3b61", "prompt_jinja": "Passage: {{passage}}\n\nAfter reading this passage, I have a question: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009049526374650807}, {"task_name": "boolq", "prompt_name": "after_reading", "acc_norm": 0.5553333333333333, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "3e386463-1715-4578-9cba-07d11a0d3b61", "prompt_jinja": "Passage: {{passage}}\n\nAfter reading this passage, I have a question: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009074149407514857}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_0.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_0.json new file mode 100644 index 0000000000000000000000000000000000000000..fc64149a167b7dc7df0b797f5fd2476d1ca6106e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "exercise", "acc": 0.6233333333333333, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "9f4c6b0a-437b-40c0-b467-db4b7218d38d", "prompt_jinja": "Exercise: read the text and answer the question by True or False.\n\nText: {{passage}}\nQuestion: {{question}}? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.00884811049411477}, {"task_name": "boolq", "prompt_name": "exercise", "acc_norm": 0.38233333333333336, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "9f4c6b0a-437b-40c0-b467-db4b7218d38d", "prompt_jinja": "Exercise: read the text and answer the question by True or False.\n\nText: {{passage}}\nQuestion: {{question}}? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008873806022763182}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_1.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_1.json new file mode 100644 index 0000000000000000000000000000000000000000..2ab6f8dabc151781a2c4f90759a80703933ee6fc --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "exercise", "acc": 0.5423333333333333, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "9f4c6b0a-437b-40c0-b467-db4b7218d38d", "prompt_jinja": "Exercise: read the text and answer the question by True or False.\n\nText: {{passage}}\nQuestion: {{question}}? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009097447488896774}, {"task_name": "boolq", "prompt_name": "exercise", "acc_norm": 0.541, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "9f4c6b0a-437b-40c0-b467-db4b7218d38d", "prompt_jinja": "Exercise: read the text and answer the question by True or False.\n\nText: {{passage}}\nQuestion: {{question}}? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009099483512819305}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_2.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f79ecb6b57ef8eccded3c6e85b0482f026ca19b6 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "exercise", "acc": 0.5223333333333333, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "9f4c6b0a-437b-40c0-b467-db4b7218d38d", "prompt_jinja": "Exercise: read the text and answer the question by True or False.\n\nText: {{passage}}\nQuestion: {{question}}? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009121118663627244}, {"task_name": "boolq", "prompt_name": "exercise", "acc_norm": 0.5156666666666667, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "9f4c6b0a-437b-40c0-b467-db4b7218d38d", "prompt_jinja": "Exercise: read the text and answer the question by True or False.\n\nText: {{passage}}\nQuestion: {{question}}? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009125748094153247}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_3.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_3.json new file mode 100644 index 0000000000000000000000000000000000000000..aa5751c13ae381bb96bc12df83e0f5967263d2ae --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "exercise", "acc": 0.5453333333333333, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "9f4c6b0a-437b-40c0-b467-db4b7218d38d", "prompt_jinja": "Exercise: read the text and answer the question by True or False.\n\nText: {{passage}}\nQuestion: {{question}}? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009092626403553739}, {"task_name": "boolq", "prompt_name": "exercise", "acc_norm": 0.5353333333333333, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "9f4c6b0a-437b-40c0-b467-db4b7218d38d", "prompt_jinja": "Exercise: read the text and answer the question by True or False.\n\nText: {{passage}}\nQuestion: {{question}}? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009107405418833937}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_4.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b892004728d194a693f0c02da48b0cef2a8e0321 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "exercise", "acc": 0.5423333333333333, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "9f4c6b0a-437b-40c0-b467-db4b7218d38d", "prompt_jinja": "Exercise: read the text and answer the question by True or False.\n\nText: {{passage}}\nQuestion: {{question}}? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009097447488896775}, {"task_name": "boolq", "prompt_name": "exercise", "acc_norm": 0.5323333333333333, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "9f4c6b0a-437b-40c0-b467-db4b7218d38d", "prompt_jinja": "Exercise: read the text and answer the question by True or False.\n\nText: {{passage}}\nQuestion: {{question}}? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009111120825274597}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_5.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_5.json new file mode 100644 index 0000000000000000000000000000000000000000..5f42e11bbd2169ff9bdcdd1857dedd2a176d8f44 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_exercise_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "exercise", "acc": 0.543, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "9f4c6b0a-437b-40c0-b467-db4b7218d38d", "prompt_jinja": "Exercise: read the text and answer the question by True or False.\n\nText: {{passage}}\nQuestion: {{question}}? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009096404868252823}, {"task_name": "boolq", "prompt_name": "exercise", "acc_norm": 0.5323333333333333, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "9f4c6b0a-437b-40c0-b467-db4b7218d38d", "prompt_jinja": "Exercise: read the text and answer the question by True or False.\n\nText: {{passage}}\nQuestion: {{question}}? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009111120825274599}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_0.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..6fdfc68486c5aab41beaaa899064e689d0c1648e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "valid_binary", "acc": 0.61, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "eb78772c-e81e-4b8a-a77b-b75efd1c212a", "prompt_jinja": "{{passage}}\n\nQ: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008906539291295217}, {"task_name": "boolq", "prompt_name": "valid_binary", "acc_norm": 0.4523333333333333, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "eb78772c-e81e-4b8a-a77b-b75efd1c212a", "prompt_jinja": "{{passage}}\n\nQ: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009088646624339617}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_1.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..c97f0a121b9558435017122666d90b1de289a46c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "valid_binary", "acc": 0.5586666666666666, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "eb78772c-e81e-4b8a-a77b-b75efd1c212a", "prompt_jinja": "{{passage}}\n\nQ: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009067164858263418}, {"task_name": "boolq", "prompt_name": "valid_binary", "acc_norm": 0.5546666666666666, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "eb78772c-e81e-4b8a-a77b-b75efd1c212a", "prompt_jinja": "{{passage}}\n\nQ: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009075496684215473}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_2.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..99205672074564c6ecccb8b8994ee1a02bc6d32b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "valid_binary", "acc": 0.5856666666666667, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "eb78772c-e81e-4b8a-a77b-b75efd1c212a", "prompt_jinja": "{{passage}}\n\nQ: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008995223478188036}, {"task_name": "boolq", "prompt_name": "valid_binary", "acc_norm": 0.5746666666666667, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "eb78772c-e81e-4b8a-a77b-b75efd1c212a", "prompt_jinja": "{{passage}}\n\nQ: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009027853030468718}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_3.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..730a201fea4e33273ec6a8ad12ea516aeac50123 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "valid_binary", "acc": 0.5883333333333334, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "eb78772c-e81e-4b8a-a77b-b75efd1c212a", "prompt_jinja": "{{passage}}\n\nQ: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008986619341172333}, {"task_name": "boolq", "prompt_name": "valid_binary", "acc_norm": 0.573, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "eb78772c-e81e-4b8a-a77b-b75efd1c212a", "prompt_jinja": "{{passage}}\n\nQ: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009032396953831094}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_4.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1237b800474513ab738c11335ecc18e8eeb6b9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "valid_binary", "acc": 0.581, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "eb78772c-e81e-4b8a-a77b-b75efd1c212a", "prompt_jinja": "{{passage}}\n\nQ: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009009627690997004}, {"task_name": "boolq", "prompt_name": "valid_binary", "acc_norm": 0.5746666666666667, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "eb78772c-e81e-4b8a-a77b-b75efd1c212a", "prompt_jinja": "{{passage}}\n\nQ: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009027853030468722}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_5.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..01f4461644e5f5415cc0d31fb7b8959ccfe7fe69 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_valid_binary_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "valid_binary", "acc": 0.582, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "eb78772c-e81e-4b8a-a77b-b75efd1c212a", "prompt_jinja": "{{passage}}\n\nQ: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009006610887558775}, {"task_name": "boolq", "prompt_name": "valid_binary", "acc_norm": 0.575, "fixed_answer_choice_list": ["False", "True"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "eb78772c-e81e-4b8a-a77b-b75efd1c212a", "prompt_jinja": "{{passage}}\n\nQ: {{question}}? True or False? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.00902693165837962}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_0.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_0.json new file mode 100644 index 0000000000000000000000000000000000000000..52e25e8213dee291cbda45e397093420c4fb92d9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "yes_no_question", "acc": 0.624, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5", "prompt_jinja": "Text: {{passage}}\n\nAnswer the following yes/no question: {{question}}? Yes or no? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.008845002997512754}, {"task_name": "boolq", "prompt_name": "yes_no_question", "acc_norm": 0.6236666666666667, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5", "prompt_jinja": "Text: {{passage}}\n\nAnswer the following yes/no question: {{question}}? Yes or no? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008846558976258922}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_1.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_1.json new file mode 100644 index 0000000000000000000000000000000000000000..ae0d0d31a43e6e0e6127ac20c997388a98064ddd --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "yes_no_question", "acc": 0.5476666666666666, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5", "prompt_jinja": "Text: {{passage}}\n\nAnswer the following yes/no question: {{question}}? Yes or no? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009088646624339617}, {"task_name": "boolq", "prompt_name": "yes_no_question", "acc_norm": 0.57, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5", "prompt_jinja": "Text: {{passage}}\n\nAnswer the following yes/no question: {{question}}? Yes or no? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.009040312075041282}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_2.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_2.json new file mode 100644 index 0000000000000000000000000000000000000000..7adfe1614cdd8af5c4cc9a17277f7c0195861942 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "yes_no_question", "acc": 0.579, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5", "prompt_jinja": "Text: {{passage}}\n\nAnswer the following yes/no question: {{question}}? Yes or no? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009015547313638383}, {"task_name": "boolq", "prompt_name": "yes_no_question", "acc_norm": 0.5933333333333334, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5", "prompt_jinja": "Text: {{passage}}\n\nAnswer the following yes/no question: {{question}}? Yes or no? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008969751860881005}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_3.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_3.json new file mode 100644 index 0000000000000000000000000000000000000000..6e1484d0fff4bda1d75cc9df02b8a5f1e65d63bc --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "yes_no_question", "acc": 0.5733333333333334, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5", "prompt_jinja": "Text: {{passage}}\n\nAnswer the following yes/no question: {{question}}? Yes or no? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009031496556538196}, {"task_name": "boolq", "prompt_name": "yes_no_question", "acc_norm": 0.597, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5", "prompt_jinja": "Text: {{passage}}\n\nAnswer the following yes/no question: {{question}}? Yes or no? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008956770693005616}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_4.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_4.json new file mode 100644 index 0000000000000000000000000000000000000000..3a0996db4f4c5f063ae140cd466d81b92ae0fb90 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "yes_no_question", "acc": 0.575, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5", "prompt_jinja": "Text: {{passage}}\n\nAnswer the following yes/no question: {{question}}? Yes or no? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009026931658379624}, {"task_name": "boolq", "prompt_name": "yes_no_question", "acc_norm": 0.5986666666666667, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5", "prompt_jinja": "Text: {{passage}}\n\nAnswer the following yes/no question: {{question}}? Yes or no? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008950698369218387}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_5.json b/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_5.json new file mode 100644 index 0000000000000000000000000000000000000000..ce02c9c174cd70627b39b81739aae2adc6c37027 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_boolq_yes_no_question_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "boolq", "prompt_name": "yes_no_question", "acc": 0.5706666666666667, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5", "prompt_jinja": "Text: {{passage}}\n\nAnswer the following yes/no question: {{question}}? Yes or no? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.009038582451449428}, {"task_name": "boolq", "prompt_name": "yes_no_question", "acc_norm": 0.5973333333333334, "fixed_answer_choice_list": ["No", "Yes"], "dataset_path": "super_glue", "dataset_name": "boolq", "subset": null, "prompt_id": "7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5", "prompt_jinja": "Text: {{passage}}\n\nAnswer the following yes/no question: {{question}}? Yes or no? |||\n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.008955564831687461}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_0.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_0.json new file mode 100644 index 0000000000000000000000000000000000000000..6a9bea9038b385e36ca3f310e8171480db3f1352 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "GPT-3 style", "acc": 0.4107142857142857, "fixed_answer_choice_list": ["True", "False", "Neither"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "75db2bc2-3caa-4956-9653-13c7dd6255df", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06633634150359538}, {"task_name": "cb", "prompt_name": "GPT-3 style", "f1": 0.1940928270042194, "fixed_answer_choice_list": ["True", "False", "Neither"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "75db2bc2-3caa-4956-9653-13c7dd6255df", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_1.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_1.json new file mode 100644 index 0000000000000000000000000000000000000000..7863dd4ca912a895adc552c1288c10b67cd2f90d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "GPT-3 style", "acc": 0.39285714285714285, "fixed_answer_choice_list": ["True", "False", "Neither"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "75db2bc2-3caa-4956-9653-13c7dd6255df", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0658538889806635}, {"task_name": "cb", "prompt_name": "GPT-3 style", "f1": 0.27686156921539234, "fixed_answer_choice_list": ["True", "False", "Neither"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "75db2bc2-3caa-4956-9653-13c7dd6255df", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_2.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_2.json new file mode 100644 index 0000000000000000000000000000000000000000..4ac0c8451ea4ea8580d6a9b71d1a9ecac4e492ee --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "GPT-3 style", "acc": 0.44642857142857145, "fixed_answer_choice_list": ["True", "False", "Neither"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "75db2bc2-3caa-4956-9653-13c7dd6255df", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06703189227942398}, {"task_name": "cb", "prompt_name": "GPT-3 style", "f1": 0.2956393200295639, "fixed_answer_choice_list": ["True", "False", "Neither"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "75db2bc2-3caa-4956-9653-13c7dd6255df", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_3.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_3.json new file mode 100644 index 0000000000000000000000000000000000000000..16826194e834b23521e71c80b5d20b0325e42f1d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "GPT-3 style", "acc": 0.39285714285714285, "fixed_answer_choice_list": ["True", "False", "Neither"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "75db2bc2-3caa-4956-9653-13c7dd6255df", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0658538889806635}, {"task_name": "cb", "prompt_name": "GPT-3 style", "f1": 0.2596899224806202, "fixed_answer_choice_list": ["True", "False", "Neither"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "75db2bc2-3caa-4956-9653-13c7dd6255df", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_4.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_4.json new file mode 100644 index 0000000000000000000000000000000000000000..39bba47c99db577e851ed7672617b62eefea02d3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "GPT-3 style", "acc": 0.375, "fixed_answer_choice_list": ["True", "False", "Neither"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "75db2bc2-3caa-4956-9653-13c7dd6255df", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06527912098338669}, {"task_name": "cb", "prompt_name": "GPT-3 style", "f1": 0.23582089552238802, "fixed_answer_choice_list": ["True", "False", "Neither"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "75db2bc2-3caa-4956-9653-13c7dd6255df", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_5.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_5.json new file mode 100644 index 0000000000000000000000000000000000000000..9187e7ccc2251b10c6f4ac59b2eb30c9f02e64ec --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_GPT-3-style_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "GPT-3 style", "acc": 0.39285714285714285, "fixed_answer_choice_list": ["True", "False", "Neither"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "75db2bc2-3caa-4956-9653-13c7dd6255df", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0658538889806635}, {"task_name": "cb", "prompt_name": "GPT-3 style", "f1": 0.235006435006435, "fixed_answer_choice_list": ["True", "False", "Neither"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "75db2bc2-3caa-4956-9653-13c7dd6255df", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_0.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_0.json new file mode 100644 index 0000000000000000000000000000000000000000..f332c269d45329b2164ed51cd6b13e148a144923 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "MNLI crowdsource", "acc": 0.4107142857142857, "fixed_answer_choice_list": ["Correct", "Incorrect", "Inconclusive"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "bee62bfa-5307-4e1c-97b2-2ad2f7bcb179", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06633634150359538}, {"task_name": "cb", "prompt_name": "MNLI crowdsource", "f1": 0.1940928270042194, "fixed_answer_choice_list": ["Correct", "Incorrect", "Inconclusive"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "bee62bfa-5307-4e1c-97b2-2ad2f7bcb179", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_1.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_1.json new file mode 100644 index 0000000000000000000000000000000000000000..98a347a18176c2e5b86a43ee483b123c06735fd3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "MNLI crowdsource", "acc": 0.39285714285714285, "fixed_answer_choice_list": ["Correct", "Incorrect", "Inconclusive"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "bee62bfa-5307-4e1c-97b2-2ad2f7bcb179", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0658538889806635}, {"task_name": "cb", "prompt_name": "MNLI crowdsource", "f1": 0.2842025699168556, "fixed_answer_choice_list": ["Correct", "Incorrect", "Inconclusive"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "bee62bfa-5307-4e1c-97b2-2ad2f7bcb179", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_2.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_2.json new file mode 100644 index 0000000000000000000000000000000000000000..018684a9e43fc49074445dfeb1e4704229940f2f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "MNLI crowdsource", "acc": 0.4107142857142857, "fixed_answer_choice_list": ["Correct", "Incorrect", "Inconclusive"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "bee62bfa-5307-4e1c-97b2-2ad2f7bcb179", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06633634150359541}, {"task_name": "cb", "prompt_name": "MNLI crowdsource", "f1": 0.28245294474802674, "fixed_answer_choice_list": ["Correct", "Incorrect", "Inconclusive"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "bee62bfa-5307-4e1c-97b2-2ad2f7bcb179", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_3.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_3.json new file mode 100644 index 0000000000000000000000000000000000000000..44e68ad73a8638052df6e28ca064c158a81164a7 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "MNLI crowdsource", "acc": 0.39285714285714285, "fixed_answer_choice_list": ["Correct", "Incorrect", "Inconclusive"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "bee62bfa-5307-4e1c-97b2-2ad2f7bcb179", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06585388898066351}, {"task_name": "cb", "prompt_name": "MNLI crowdsource", "f1": 0.27449576321756775, "fixed_answer_choice_list": ["Correct", "Incorrect", "Inconclusive"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "bee62bfa-5307-4e1c-97b2-2ad2f7bcb179", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_4.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_4.json new file mode 100644 index 0000000000000000000000000000000000000000..922b3f95b3d51d12bbbb7b446933f133c150c43d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "MNLI crowdsource", "acc": 0.4107142857142857, "fixed_answer_choice_list": ["Correct", "Incorrect", "Inconclusive"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "bee62bfa-5307-4e1c-97b2-2ad2f7bcb179", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06633634150359541}, {"task_name": "cb", "prompt_name": "MNLI crowdsource", "f1": 0.28319209039548027, "fixed_answer_choice_list": ["Correct", "Incorrect", "Inconclusive"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "bee62bfa-5307-4e1c-97b2-2ad2f7bcb179", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_5.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_5.json new file mode 100644 index 0000000000000000000000000000000000000000..54b5e5fa62578ffe476daffcde1cac55600407b8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_MNLI-crowdsource_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "MNLI crowdsource", "acc": 0.35714285714285715, "fixed_answer_choice_list": ["Correct", "Incorrect", "Inconclusive"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "bee62bfa-5307-4e1c-97b2-2ad2f7bcb179", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06460957383809218}, {"task_name": "cb", "prompt_name": "MNLI crowdsource", "f1": 0.24859943977591037, "fixed_answer_choice_list": ["Correct", "Incorrect", "Inconclusive"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "bee62bfa-5307-4e1c-97b2-2ad2f7bcb179", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, \"{{hypothesis}}\" is definitely correct, incorrect, or inconclusive? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_0.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_0.json new file mode 100644 index 0000000000000000000000000000000000000000..0e56f0a85a027133bc8f364b7dbc46ecefb4178c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "can we infer", "acc": 0.5714285714285714, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "2e76cd0f-68ca-4f03-83ed-11cf15b25a84", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06672848092813058}, {"task_name": "cb", "prompt_name": "can we infer", "f1": 0.3983682983682984, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "2e76cd0f-68ca-4f03-83ed-11cf15b25a84", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %} ", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_1.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_1.json new file mode 100644 index 0000000000000000000000000000000000000000..9f6aab75d80a92d2fb8cfc14abfc38d0a1c7a9ed --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "can we infer", "acc": 0.42857142857142855, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "2e76cd0f-68ca-4f03-83ed-11cf15b25a84", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06672848092813058}, {"task_name": "cb", "prompt_name": "can we infer", "f1": 0.29449152542372886, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "2e76cd0f-68ca-4f03-83ed-11cf15b25a84", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %} ", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_2.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_2.json new file mode 100644 index 0000000000000000000000000000000000000000..fa9fb1584b07ada0cc4a182527c9ef0901b44a46 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "can we infer", "acc": 0.5, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "2e76cd0f-68ca-4f03-83ed-11cf15b25a84", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06741998624632421}, {"task_name": "cb", "prompt_name": "can we infer", "f1": 0.34717335209946043, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "2e76cd0f-68ca-4f03-83ed-11cf15b25a84", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %} ", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_3.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_3.json new file mode 100644 index 0000000000000000000000000000000000000000..4d00e8018452a3d9819444047c003dc6ef4369b4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "can we infer", "acc": 0.5, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "2e76cd0f-68ca-4f03-83ed-11cf15b25a84", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06741998624632421}, {"task_name": "cb", "prompt_name": "can we infer", "f1": 0.34491725768321513, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "2e76cd0f-68ca-4f03-83ed-11cf15b25a84", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %} ", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_4.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_4.json new file mode 100644 index 0000000000000000000000000000000000000000..1d022649ac4773eaea9098c23fb015c3d4085fb4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "can we infer", "acc": 0.5, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "2e76cd0f-68ca-4f03-83ed-11cf15b25a84", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06741998624632421}, {"task_name": "cb", "prompt_name": "can we infer", "f1": 0.3485060690943043, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "2e76cd0f-68ca-4f03-83ed-11cf15b25a84", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %} ", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_5.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_5.json new file mode 100644 index 0000000000000000000000000000000000000000..fa695a5d7a128f82fcc2510459cf969f23bb1c8d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_can-we-infer_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "can we infer", "acc": 0.44642857142857145, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "2e76cd0f-68ca-4f03-83ed-11cf15b25a84", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %} ", "prompt_original_task": true, "comment": "", "acc_stderr": 0.067031892279424}, {"task_name": "cb", "prompt_name": "can we infer", "f1": 0.3013431013431014, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "2e76cd0f-68ca-4f03-83ed-11cf15b25a84", "prompt_jinja": "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %} ", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_0.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_0.json new file mode 100644 index 0000000000000000000000000000000000000000..a95bf247371785c5a16f55d3d02529835837ce36 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "guaranteed/possible/impossible", "acc": 0.14285714285714285, "fixed_answer_choice_list": ["Guaranteed", "Impossible", "Possible"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "90ab1002-093c-4e54-b48f-626655e36b65", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0471841613625583}, {"task_name": "cb", "prompt_name": "guaranteed/possible/impossible", "f1": 0.11815661815661815, "fixed_answer_choice_list": ["Guaranteed", "Impossible", "Possible"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "90ab1002-093c-4e54-b48f-626655e36b65", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_1.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_1.json new file mode 100644 index 0000000000000000000000000000000000000000..f1f62dfdbca466a22cdd43ac49dac2196c3ed3e9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "guaranteed/possible/impossible", "acc": 0.39285714285714285, "fixed_answer_choice_list": ["Guaranteed", "Impossible", "Possible"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "90ab1002-093c-4e54-b48f-626655e36b65", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0658538889806635}, {"task_name": "cb", "prompt_name": "guaranteed/possible/impossible", "f1": 0.2842025699168556, "fixed_answer_choice_list": ["Guaranteed", "Impossible", "Possible"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "90ab1002-093c-4e54-b48f-626655e36b65", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_2.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_2.json new file mode 100644 index 0000000000000000000000000000000000000000..5c79e81f1a1964504e142c18e324cbc8088a04c0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "guaranteed/possible/impossible", "acc": 0.375, "fixed_answer_choice_list": ["Guaranteed", "Impossible", "Possible"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "90ab1002-093c-4e54-b48f-626655e36b65", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06527912098338669}, {"task_name": "cb", "prompt_name": "guaranteed/possible/impossible", "f1": 0.2567567567567568, "fixed_answer_choice_list": ["Guaranteed", "Impossible", "Possible"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "90ab1002-093c-4e54-b48f-626655e36b65", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_3.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_3.json new file mode 100644 index 0000000000000000000000000000000000000000..dff0d694aeca69f59d8e7eab5ce9ec2123e6fa15 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "guaranteed/possible/impossible", "acc": 0.35714285714285715, "fixed_answer_choice_list": ["Guaranteed", "Impossible", "Possible"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "90ab1002-093c-4e54-b48f-626655e36b65", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06460957383809221}, {"task_name": "cb", "prompt_name": "guaranteed/possible/impossible", "f1": 0.22072072072072071, "fixed_answer_choice_list": ["Guaranteed", "Impossible", "Possible"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "90ab1002-093c-4e54-b48f-626655e36b65", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_4.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_4.json new file mode 100644 index 0000000000000000000000000000000000000000..f16d610dc8964c81473eff709ea26aa63fff85b4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "guaranteed/possible/impossible", "acc": 0.39285714285714285, "fixed_answer_choice_list": ["Guaranteed", "Impossible", "Possible"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "90ab1002-093c-4e54-b48f-626655e36b65", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0658538889806635}, {"task_name": "cb", "prompt_name": "guaranteed/possible/impossible", "f1": 0.24598511555033295, "fixed_answer_choice_list": ["Guaranteed", "Impossible", "Possible"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "90ab1002-093c-4e54-b48f-626655e36b65", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_5.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_5.json new file mode 100644 index 0000000000000000000000000000000000000000..06af399aad5ed2c53846e2941e6c5594e8bdf40c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_guaranteed-possible-impossible_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "guaranteed/possible/impossible", "acc": 0.375, "fixed_answer_choice_list": ["Guaranteed", "Impossible", "Possible"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "90ab1002-093c-4e54-b48f-626655e36b65", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06527912098338669}, {"task_name": "cb", "prompt_name": "guaranteed/possible/impossible", "f1": 0.25396825396825395, "fixed_answer_choice_list": ["Guaranteed", "Impossible", "Possible"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "90ab1002-093c-4e54-b48f-626655e36b65", "prompt_jinja": "Assume it is true that {{premise}} \n\nTherefore, \"{{hypothesis}}\" is {{\"guaranteed\"}}, {{\"possible\"}}, or {{\"impossible\"}}? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_0.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_0.json new file mode 100644 index 0000000000000000000000000000000000000000..4a0a863438efed88263d07031fcfb9db2d02d502 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "justified in saying", "acc": 0.6071428571428571, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "5c9b1fa9-93f0-4f82-b9e3-e0967e4d7260", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0658538889806635}, {"task_name": "cb", "prompt_name": "justified in saying", "f1": 0.4233100233100233, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "5c9b1fa9-93f0-4f82-b9e3-e0967e4d7260", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_1.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_1.json new file mode 100644 index 0000000000000000000000000000000000000000..876d45cf14a1470ae093c19a3c664a031059f7b9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "justified in saying", "acc": 0.44642857142857145, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "5c9b1fa9-93f0-4f82-b9e3-e0967e4d7260", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06703189227942398}, {"task_name": "cb", "prompt_name": "justified in saying", "f1": 0.31149301825993553, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "5c9b1fa9-93f0-4f82-b9e3-e0967e4d7260", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_2.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_2.json new file mode 100644 index 0000000000000000000000000000000000000000..2cfd2e386929d8be8cb1dcc180199ff943b7a053 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "justified in saying", "acc": 0.48214285714285715, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "5c9b1fa9-93f0-4f82-b9e3-e0967e4d7260", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0673769750864465}, {"task_name": "cb", "prompt_name": "justified in saying", "f1": 0.33543859649122804, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "5c9b1fa9-93f0-4f82-b9e3-e0967e4d7260", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_3.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_3.json new file mode 100644 index 0000000000000000000000000000000000000000..45c16a727f93310aa19472e7b029c5dc90f071dc --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "justified in saying", "acc": 0.44642857142857145, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "5c9b1fa9-93f0-4f82-b9e3-e0967e4d7260", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.067031892279424}, {"task_name": "cb", "prompt_name": "justified in saying", "f1": 0.3083804143126177, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "5c9b1fa9-93f0-4f82-b9e3-e0967e4d7260", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_4.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_4.json new file mode 100644 index 0000000000000000000000000000000000000000..c02900f411d381962ec9a0c0decf2d2aa4c8d8b7 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "justified in saying", "acc": 0.5, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "5c9b1fa9-93f0-4f82-b9e3-e0967e4d7260", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.06741998624632421}, {"task_name": "cb", "prompt_name": "justified in saying", "f1": 0.34717335209946043, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "5c9b1fa9-93f0-4f82-b9e3-e0967e4d7260", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_5.json b/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_5.json new file mode 100644 index 0000000000000000000000000000000000000000..0f70cf95e8be609c3a9aaa9ba19c05096459ec79 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_cb_justified-in-saying_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "cb", "prompt_name": "justified in saying", "acc": 0.44642857142857145, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "5c9b1fa9-93f0-4f82-b9e3-e0967e4d7260", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.067031892279424}, {"task_name": "cb", "prompt_name": "justified in saying", "f1": 0.295771144278607, "fixed_answer_choice_list": ["Yes", "No", "Maybe"], "dataset_path": "super_glue", "dataset_name": "cb", "subset": null, "prompt_id": "5c9b1fa9-93f0-4f82-b9e3-e0967e4d7260", "prompt_jinja": "{{premise}} Are we justified in saying that \"{{hypothesis}}\"? Yes, no, or maybe? ||| {% if label !=-1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": ""}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_0.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_0.json new file mode 100644 index 0000000000000000000000000000000000000000..3acc52f327fd9742a5aa79a00820d6ebde1f50d0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "best_option", "acc": 0.58, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "8ce80f8a-239e-4393-892c-f63dbb0d9929", "prompt_jinja": "{{ premise }} \n\nWhat's the best option?\n- {{choice1}}\n- {{choice2}}\n\nWe are looking for {% if question == \"cause\" %} a cause {% else %} an effect {% endif %}\n||| {% if label != -1 %}{{answer_choices[label]}}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.049604496374885836}, {"task_name": "copa", "prompt_name": "best_option", "acc_norm": 0.48, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "8ce80f8a-239e-4393-892c-f63dbb0d9929", "prompt_jinja": "{{ premise }} \n\nWhat's the best option?\n- {{choice1}}\n- {{choice2}}\n\nWe are looking for {% if question == \"cause\" %} a cause {% else %} an effect {% endif %}\n||| {% if label != -1 %}{{answer_choices[label]}}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.050211673156867795}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_1.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_1.json new file mode 100644 index 0000000000000000000000000000000000000000..690b99331f07b68a11e88a1b7d0e303901cc17f8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "best_option", "acc": 0.57, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "8ce80f8a-239e-4393-892c-f63dbb0d9929", "prompt_jinja": "{{ premise }} \n\nWhat's the best option?\n- {{choice1}}\n- {{choice2}}\n\nWe are looking for {% if question == \"cause\" %} a cause {% else %} an effect {% endif %}\n||| {% if label != -1 %}{{answer_choices[label]}}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.049756985195624284}, {"task_name": "copa", "prompt_name": "best_option", "acc_norm": 0.54, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "8ce80f8a-239e-4393-892c-f63dbb0d9929", "prompt_jinja": "{{ premise }} \n\nWhat's the best option?\n- {{choice1}}\n- {{choice2}}\n\nWe are looking for {% if question == \"cause\" %} a cause {% else %} an effect {% endif %}\n||| {% if label != -1 %}{{answer_choices[label]}}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05009082659620332}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_2.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_2.json new file mode 100644 index 0000000000000000000000000000000000000000..436f986bcddfd19b1343fb6dd75e7d908ffc555d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "best_option", "acc": 0.56, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "8ce80f8a-239e-4393-892c-f63dbb0d9929", "prompt_jinja": "{{ premise }} \n\nWhat's the best option?\n- {{choice1}}\n- {{choice2}}\n\nWe are looking for {% if question == \"cause\" %} a cause {% else %} an effect {% endif %}\n||| {% if label != -1 %}{{answer_choices[label]}}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.04988876515698589}, {"task_name": "copa", "prompt_name": "best_option", "acc_norm": 0.51, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "8ce80f8a-239e-4393-892c-f63dbb0d9929", "prompt_jinja": "{{ premise }} \n\nWhat's the best option?\n- {{choice1}}\n- {{choice2}}\n\nWe are looking for {% if question == \"cause\" %} a cause {% else %} an effect {% endif %}\n||| {% if label != -1 %}{{answer_choices[label]}}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05024183937956912}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_3.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_3.json new file mode 100644 index 0000000000000000000000000000000000000000..846d1c4ebd9f83b184db242e075a86a335088cf9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "best_option", "acc": 0.63, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "8ce80f8a-239e-4393-892c-f63dbb0d9929", "prompt_jinja": "{{ premise }} \n\nWhat's the best option?\n- {{choice1}}\n- {{choice2}}\n\nWe are looking for {% if question == \"cause\" %} a cause {% else %} an effect {% endif %}\n||| {% if label != -1 %}{{answer_choices[label]}}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.04852365870939099}, {"task_name": "copa", "prompt_name": "best_option", "acc_norm": 0.54, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "8ce80f8a-239e-4393-892c-f63dbb0d9929", "prompt_jinja": "{{ premise }} \n\nWhat's the best option?\n- {{choice1}}\n- {{choice2}}\n\nWe are looking for {% if question == \"cause\" %} a cause {% else %} an effect {% endif %}\n||| {% if label != -1 %}{{answer_choices[label]}}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05009082659620332}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_4.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_4.json new file mode 100644 index 0000000000000000000000000000000000000000..65ccb42a30d0f71e0d1ac75b75d90f447521177f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "best_option", "acc": 0.59, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "8ce80f8a-239e-4393-892c-f63dbb0d9929", "prompt_jinja": "{{ premise }} \n\nWhat's the best option?\n- {{choice1}}\n- {{choice2}}\n\nWe are looking for {% if question == \"cause\" %} a cause {% else %} an effect {% endif %}\n||| {% if label != -1 %}{{answer_choices[label]}}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.049431107042371025}, {"task_name": "copa", "prompt_name": "best_option", "acc_norm": 0.53, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "8ce80f8a-239e-4393-892c-f63dbb0d9929", "prompt_jinja": "{{ premise }} \n\nWhat's the best option?\n- {{choice1}}\n- {{choice2}}\n\nWe are looking for {% if question == \"cause\" %} a cause {% else %} an effect {% endif %}\n||| {% if label != -1 %}{{answer_choices[label]}}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.050161355804659205}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_5.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_5.json new file mode 100644 index 0000000000000000000000000000000000000000..e971a5f4500e891769b429e2574ae256e4d78c71 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_best_option_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "best_option", "acc": 0.58, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "8ce80f8a-239e-4393-892c-f63dbb0d9929", "prompt_jinja": "{{ premise }} \n\nWhat's the best option?\n- {{choice1}}\n- {{choice2}}\n\nWe are looking for {% if question == \"cause\" %} a cause {% else %} an effect {% endif %}\n||| {% if label != -1 %}{{answer_choices[label]}}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.049604496374885836}, {"task_name": "copa", "prompt_name": "best_option", "acc_norm": 0.56, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "8ce80f8a-239e-4393-892c-f63dbb0d9929", "prompt_jinja": "{{ premise }} \n\nWhat's the best option?\n- {{choice1}}\n- {{choice2}}\n\nWe are looking for {% if question == \"cause\" %} a cause {% else %} an effect {% endif %}\n||| {% if label != -1 %}{{answer_choices[label]}}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.04988876515698589}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_0.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_0.json new file mode 100644 index 0000000000000000000000000000000000000000..fe4eed0b45d10ad2f25d1b3c2db43cfcc2f027df --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "cause_effect", "acc": 0.57, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "a61d8c21-da25-47bf-b5fe-14a8edd650af", "prompt_jinja": "{{ premise }}\n\nSelect the most plausible {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.049756985195624284}, {"task_name": "copa", "prompt_name": "cause_effect", "acc_norm": 0.53, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "a61d8c21-da25-47bf-b5fe-14a8edd650af", "prompt_jinja": "{{ premise }}\n\nSelect the most plausible {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05016135580465919}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_1.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_1.json new file mode 100644 index 0000000000000000000000000000000000000000..2cd78891b86a8dfd40bae5a04b54925b9d19b1c0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "cause_effect", "acc": 0.57, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "a61d8c21-da25-47bf-b5fe-14a8edd650af", "prompt_jinja": "{{ premise }}\n\nSelect the most plausible {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.04975698519562428}, {"task_name": "copa", "prompt_name": "cause_effect", "acc_norm": 0.51, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "a61d8c21-da25-47bf-b5fe-14a8edd650af", "prompt_jinja": "{{ premise }}\n\nSelect the most plausible {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05024183937956912}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_2.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_2.json new file mode 100644 index 0000000000000000000000000000000000000000..e8bc7ceaf319bd302340a02c538a233eb6cb4a22 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "cause_effect", "acc": 0.53, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "a61d8c21-da25-47bf-b5fe-14a8edd650af", "prompt_jinja": "{{ premise }}\n\nSelect the most plausible {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.05016135580465919}, {"task_name": "copa", "prompt_name": "cause_effect", "acc_norm": 0.48, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "a61d8c21-da25-47bf-b5fe-14a8edd650af", "prompt_jinja": "{{ premise }}\n\nSelect the most plausible {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.050211673156867795}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_3.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_3.json new file mode 100644 index 0000000000000000000000000000000000000000..7dae9ea2c2c9f513d53ef1f041866b4ef6d5b58f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "cause_effect", "acc": 0.53, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "a61d8c21-da25-47bf-b5fe-14a8edd650af", "prompt_jinja": "{{ premise }}\n\nSelect the most plausible {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.05016135580465919}, {"task_name": "copa", "prompt_name": "cause_effect", "acc_norm": 0.51, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "a61d8c21-da25-47bf-b5fe-14a8edd650af", "prompt_jinja": "{{ premise }}\n\nSelect the most plausible {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05024183937956911}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_4.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_4.json new file mode 100644 index 0000000000000000000000000000000000000000..7e57a7f000a115ee52796fd80163842aba424937 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "cause_effect", "acc": 0.52, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "a61d8c21-da25-47bf-b5fe-14a8edd650af", "prompt_jinja": "{{ premise }}\n\nSelect the most plausible {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.050211673156867795}, {"task_name": "copa", "prompt_name": "cause_effect", "acc_norm": 0.48, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "a61d8c21-da25-47bf-b5fe-14a8edd650af", "prompt_jinja": "{{ premise }}\n\nSelect the most plausible {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.050211673156867795}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_5.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_5.json new file mode 100644 index 0000000000000000000000000000000000000000..536ed53ec35f1042a7fcd4b9e01cf8bab2d4690f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_cause_effect_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "cause_effect", "acc": 0.51, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "a61d8c21-da25-47bf-b5fe-14a8edd650af", "prompt_jinja": "{{ premise }}\n\nSelect the most plausible {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.05024183937956911}, {"task_name": "copa", "prompt_name": "cause_effect", "acc_norm": 0.48, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "a61d8c21-da25-47bf-b5fe-14a8edd650af", "prompt_jinja": "{{ premise }}\n\nSelect the most plausible {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.050211673156867795}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_0.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_0.json new file mode 100644 index 0000000000000000000000000000000000000000..779f4df36c69420f5dd8ab53b5b462960b81b856 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "choose", "acc": 0.6, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "f32348cd-d3cb-4619-87b9-e24f99c78567", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} because... {% else %} so... {% endif %}\nChoose between:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.04923659639173309}, {"task_name": "copa", "prompt_name": "choose", "acc_norm": 0.47, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "f32348cd-d3cb-4619-87b9-e24f99c78567", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} because... {% else %} so... {% endif %}\nChoose between:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05016135580465919}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_1.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_1.json new file mode 100644 index 0000000000000000000000000000000000000000..9cfe0706a39275948fd61e69861baf7d2705086a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "choose", "acc": 0.54, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "f32348cd-d3cb-4619-87b9-e24f99c78567", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} because... {% else %} so... {% endif %}\nChoose between:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.05009082659620333}, {"task_name": "copa", "prompt_name": "choose", "acc_norm": 0.49, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "f32348cd-d3cb-4619-87b9-e24f99c78567", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} because... {% else %} so... {% endif %}\nChoose between:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05024183937956911}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_2.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_2.json new file mode 100644 index 0000000000000000000000000000000000000000..c84fc9dca54466d9279e7e92c168752b075c23b8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "choose", "acc": 0.53, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "f32348cd-d3cb-4619-87b9-e24f99c78567", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} because... {% else %} so... {% endif %}\nChoose between:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.05016135580465919}, {"task_name": "copa", "prompt_name": "choose", "acc_norm": 0.51, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "f32348cd-d3cb-4619-87b9-e24f99c78567", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} because... {% else %} so... {% endif %}\nChoose between:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05024183937956911}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_3.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_3.json new file mode 100644 index 0000000000000000000000000000000000000000..f37a5556490748e830661c110a340622734be753 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "choose", "acc": 0.48, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "f32348cd-d3cb-4619-87b9-e24f99c78567", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} because... {% else %} so... {% endif %}\nChoose between:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.050211673156867795}, {"task_name": "copa", "prompt_name": "choose", "acc_norm": 0.49, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "f32348cd-d3cb-4619-87b9-e24f99c78567", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} because... {% else %} so... {% endif %}\nChoose between:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05024183937956912}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_4.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d3c6ec1e9217ff03231d095c1cabf2967fb7ae63 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "choose", "acc": 0.5, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "f32348cd-d3cb-4619-87b9-e24f99c78567", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} because... {% else %} so... {% endif %}\nChoose between:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.050251890762960605}, {"task_name": "copa", "prompt_name": "choose", "acc_norm": 0.45, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "f32348cd-d3cb-4619-87b9-e24f99c78567", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} because... {% else %} so... {% endif %}\nChoose between:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.04999999999999999}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_5.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_5.json new file mode 100644 index 0000000000000000000000000000000000000000..855150f8b9879b0c94af2336ee82816ffccd6f0c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_choose_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "choose", "acc": 0.48, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "f32348cd-d3cb-4619-87b9-e24f99c78567", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} because... {% else %} so... {% endif %}\nChoose between:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.050211673156867795}, {"task_name": "copa", "prompt_name": "choose", "acc_norm": 0.48, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "f32348cd-d3cb-4619-87b9-e24f99c78567", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} because... {% else %} so... {% endif %}\nChoose between:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.050211673156867795}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_0.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_0.json new file mode 100644 index 0000000000000000000000000000000000000000..38b240fb79dfa23ca7063dcba6ed42ca1bcc7d6f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "i_am_hesitating", "acc": 0.61, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "4d879cbe-2fd7-424a-9d78-3f5200313fba", "prompt_jinja": "{{ premise }} \n\nI am hesitating between two options. Help me choose the more likely {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.049020713000019756}, {"task_name": "copa", "prompt_name": "i_am_hesitating", "acc_norm": 0.54, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "4d879cbe-2fd7-424a-9d78-3f5200313fba", "prompt_jinja": "{{ premise }} \n\nI am hesitating between two options. Help me choose the more likely {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05009082659620332}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_1.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_1.json new file mode 100644 index 0000000000000000000000000000000000000000..11f99773801beb16a4cfe446010d4b19a6344fa4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "i_am_hesitating", "acc": 0.55, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "4d879cbe-2fd7-424a-9d78-3f5200313fba", "prompt_jinja": "{{ premise }} \n\nI am hesitating between two options. Help me choose the more likely {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.05}, {"task_name": "copa", "prompt_name": "i_am_hesitating", "acc_norm": 0.5, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "4d879cbe-2fd7-424a-9d78-3f5200313fba", "prompt_jinja": "{{ premise }} \n\nI am hesitating between two options. Help me choose the more likely {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.050251890762960605}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_2.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_2.json new file mode 100644 index 0000000000000000000000000000000000000000..a6ab20848cf47dba508dd5c9e20e257ccd0ffdc6 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "i_am_hesitating", "acc": 0.52, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "4d879cbe-2fd7-424a-9d78-3f5200313fba", "prompt_jinja": "{{ premise }} \n\nI am hesitating between two options. Help me choose the more likely {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.050211673156867795}, {"task_name": "copa", "prompt_name": "i_am_hesitating", "acc_norm": 0.49, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "4d879cbe-2fd7-424a-9d78-3f5200313fba", "prompt_jinja": "{{ premise }} \n\nI am hesitating between two options. Help me choose the more likely {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05024183937956912}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_3.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_3.json new file mode 100644 index 0000000000000000000000000000000000000000..67b6b6d61105b4c5a7af7c8478d7b98935fb3500 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "i_am_hesitating", "acc": 0.52, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "4d879cbe-2fd7-424a-9d78-3f5200313fba", "prompt_jinja": "{{ premise }} \n\nI am hesitating between two options. Help me choose the more likely {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.050211673156867795}, {"task_name": "copa", "prompt_name": "i_am_hesitating", "acc_norm": 0.46, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "4d879cbe-2fd7-424a-9d78-3f5200313fba", "prompt_jinja": "{{ premise }} \n\nI am hesitating between two options. Help me choose the more likely {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05009082659620332}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_4.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_4.json new file mode 100644 index 0000000000000000000000000000000000000000..207a2178318d687962736751ac2857bb46a00a1d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "i_am_hesitating", "acc": 0.5, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "4d879cbe-2fd7-424a-9d78-3f5200313fba", "prompt_jinja": "{{ premise }} \n\nI am hesitating between two options. Help me choose the more likely {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.050251890762960605}, {"task_name": "copa", "prompt_name": "i_am_hesitating", "acc_norm": 0.47, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "4d879cbe-2fd7-424a-9d78-3f5200313fba", "prompt_jinja": "{{ premise }} \n\nI am hesitating between two options. Help me choose the more likely {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05016135580465919}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_5.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_5.json new file mode 100644 index 0000000000000000000000000000000000000000..064a459e1e224fc175d96d9de0d99ffebbc30443 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_i_am_hesitating_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "i_am_hesitating", "acc": 0.49, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "4d879cbe-2fd7-424a-9d78-3f5200313fba", "prompt_jinja": "{{ premise }} \n\nI am hesitating between two options. Help me choose the more likely {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.05024183937956912}, {"task_name": "copa", "prompt_name": "i_am_hesitating", "acc_norm": 0.47, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "4d879cbe-2fd7-424a-9d78-3f5200313fba", "prompt_jinja": "{{ premise }} \n\nI am hesitating between two options. Help me choose the more likely {% if question == \"cause\" %} cause: {% else %} effect: {% endif %}\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05016135580465919}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_0.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_0.json new file mode 100644 index 0000000000000000000000000000000000000000..d38bdc3bd16bcd0a29c2ae17f2a3bdc42293461b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "plausible_alternatives", "acc": 0.59, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "66ea075e-4d03-4a78-b1fa-9a5228cf0c9d", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} This happened because... {% else %} As a consequence... {% endif %}\nHelp me pick the more plausible option:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.049431107042371025}, {"task_name": "copa", "prompt_name": "plausible_alternatives", "acc_norm": 0.54, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "66ea075e-4d03-4a78-b1fa-9a5228cf0c9d", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} This happened because... {% else %} As a consequence... {% endif %}\nHelp me pick the more plausible option:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05009082659620332}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_1.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_1.json new file mode 100644 index 0000000000000000000000000000000000000000..0ffa0a3fdd20b855898f54d613fce4392251208b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "plausible_alternatives", "acc": 0.53, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "66ea075e-4d03-4a78-b1fa-9a5228cf0c9d", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} This happened because... {% else %} As a consequence... {% endif %}\nHelp me pick the more plausible option:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.05016135580465919}, {"task_name": "copa", "prompt_name": "plausible_alternatives", "acc_norm": 0.49, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "66ea075e-4d03-4a78-b1fa-9a5228cf0c9d", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} This happened because... {% else %} As a consequence... {% endif %}\nHelp me pick the more plausible option:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05024183937956912}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_2.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_2.json new file mode 100644 index 0000000000000000000000000000000000000000..e0538596488c250abb11040137e73cb054e0a07e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "plausible_alternatives", "acc": 0.49, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "66ea075e-4d03-4a78-b1fa-9a5228cf0c9d", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} This happened because... {% else %} As a consequence... {% endif %}\nHelp me pick the more plausible option:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.05024183937956912}, {"task_name": "copa", "prompt_name": "plausible_alternatives", "acc_norm": 0.47, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "66ea075e-4d03-4a78-b1fa-9a5228cf0c9d", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} This happened because... {% else %} As a consequence... {% endif %}\nHelp me pick the more plausible option:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05016135580465919}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_3.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_3.json new file mode 100644 index 0000000000000000000000000000000000000000..2f9ae61343ae9db8c271a6caf25efa3b0f621994 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "plausible_alternatives", "acc": 0.49, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "66ea075e-4d03-4a78-b1fa-9a5228cf0c9d", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} This happened because... {% else %} As a consequence... {% endif %}\nHelp me pick the more plausible option:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.05024183937956912}, {"task_name": "copa", "prompt_name": "plausible_alternatives", "acc_norm": 0.48, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "66ea075e-4d03-4a78-b1fa-9a5228cf0c9d", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} This happened because... {% else %} As a consequence... {% endif %}\nHelp me pick the more plausible option:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.050211673156867795}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_4.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_4.json new file mode 100644 index 0000000000000000000000000000000000000000..3d1ed0c81bdc5b9f251b8684070aa403e4d3d120 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "plausible_alternatives", "acc": 0.48, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "66ea075e-4d03-4a78-b1fa-9a5228cf0c9d", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} This happened because... {% else %} As a consequence... {% endif %}\nHelp me pick the more plausible option:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.050211673156867795}, {"task_name": "copa", "prompt_name": "plausible_alternatives", "acc_norm": 0.45, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "66ea075e-4d03-4a78-b1fa-9a5228cf0c9d", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} This happened because... {% else %} As a consequence... {% endif %}\nHelp me pick the more plausible option:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.04999999999999999}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_5.json b/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_5.json new file mode 100644 index 0000000000000000000000000000000000000000..9e5bb508ab7876123670408cb5d7ca9d94efdb0e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_copa_plausible_alternatives_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "copa", "prompt_name": "plausible_alternatives", "acc": 0.46, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "66ea075e-4d03-4a78-b1fa-9a5228cf0c9d", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} This happened because... {% else %} As a consequence... {% endif %}\nHelp me pick the more plausible option:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.05009082659620332}, {"task_name": "copa", "prompt_name": "plausible_alternatives", "acc_norm": 0.47, "fixed_answer_choice_list": null, "dataset_path": "super_glue", "dataset_name": "copa", "subset": null, "prompt_id": "66ea075e-4d03-4a78-b1fa-9a5228cf0c9d", "prompt_jinja": "{{ premise }} {% if question == \"cause\" %} This happened because... {% else %} As a consequence... {% endif %}\nHelp me pick the more plausible option:\n- {{choice1}}\n- {{choice2}} ||| {% if label != -1 %}{{ answer_choices[label] }}{%endif%}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.05016135580465919}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_0.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_0.json new file mode 100644 index 0000000000000000000000000000000000000000..76c8167c4794b669fed0ea6bf86cb1a74957fcc6 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "bleu": 2.7137587007909403, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04289535874730102}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_precision": 0.23420804927860228, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0029343193986469574}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_recall": 0.368972333302577, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0033242062731447717}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_fmeasure": 0.27875544176267436, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0030398850526226597}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_precision": 0.08419089263107597, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0014122078430193529}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_recall": 0.1301132540842083, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0019763854755703455}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_fmeasure": 0.0995456556042845, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001576544642160748}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_precision": 0.18337042886990745, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018758190997753684}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_recall": 0.3011863301059313, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002348188787579532}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_fmeasure": 0.22171841937262945, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019873580895336043}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_precision": 0.19284679447481895, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0023339033454095755}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_recall": 0.30684388196998574, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002638945933468923}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_fmeasure": 0.23024990094522646, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0024068830580772417}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_1.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_1.json new file mode 100644 index 0000000000000000000000000000000000000000..62df8b6be18d9b4a5d62545bca3cdcb73a7fe01d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "bleu": 4.776883166568447, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06188581593802797}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_precision": 0.32376895607264333, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003559481233250432}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_recall": 0.2766644901218466, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003006971082101755}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_fmeasure": 0.2834915331439006, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00286004072375514}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_precision": 0.11422580842443067, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001981771261667616}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_recall": 0.09502385216159488, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016087538033057863}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_fmeasure": 0.09798530798331949, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001591303023518874}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_precision": 0.23647808916387403, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002785237759844657}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_recall": 0.20111037531455733, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0023137520458429714}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_fmeasure": 0.20604394935503653, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021945968399171094}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_precision": 0.26668028425156176, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031249451907287393}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_recall": 0.22678921457291987, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026010547211686662}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_fmeasure": 0.23271534495205093, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002496962503511146}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_2.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ef4d53a4c6b008b665514344d30c27005859e5b5 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "bleu": 6.367232170877392, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08549395921417903}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_precision": 0.3773758975465212, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0033199301919147046}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_recall": 0.32188781676797334, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002721512158082673}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_fmeasure": 0.3289239334300976, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002499918188495706}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_precision": 0.14546628155822858, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002076024275324282}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_recall": 0.12068762525775042, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016561296699671543}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_fmeasure": 0.12398725975487611, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016074904321220344}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_precision": 0.2810377453951778, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027020897811990126}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_recall": 0.23833759611575786, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021523119739722788}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_fmeasure": 0.24355724501015555, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019784432789600953}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_precision": 0.3138640600673944, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0029811254616978994}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_recall": 0.26690385060781147, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0024253237610957243}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_fmeasure": 0.2728731326289227, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002250126526299403}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_3.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_3.json new file mode 100644 index 0000000000000000000000000000000000000000..b2073af9ce3dd19205d7681b620ff5743f6957b5 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "bleu": 7.254477332274835, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11215533468282095}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_precision": 0.4071643250852113, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030918195348976214}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_recall": 0.3384520664356995, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025658424963617885}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_fmeasure": 0.3494166954536371, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022556130694930294}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_precision": 0.16367687467995234, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0021599397295835646}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_recall": 0.1321193097997345, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001688691833299027}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_fmeasure": 0.1372251409323741, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016327922809273173}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_precision": 0.30714914534792875, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002654116948033462}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_recall": 0.2531238455203022, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002079981518486239}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_fmeasure": 0.2616315622148955, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018615767022711104}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_precision": 0.34078206920893017, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002868278542304959}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_recall": 0.2818084570295315, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023069075160404608}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_fmeasure": 0.29126143372342, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020762541949325723}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_4.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_4.json new file mode 100644 index 0000000000000000000000000000000000000000..a3c02dc8904ed02b68372ae51722f21c7749d8b5 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "bleu": 7.513333905948323, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.14766226863484086}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_precision": 0.41870474769317223, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030519614549617937}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_recall": 0.3420315436967001, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0024879203984106034}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_fmeasure": 0.3555881239462119, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021502692030659297}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_precision": 0.17051356574406315, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002206652366301817}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_recall": 0.1358976585047218, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017083541046320008}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_fmeasure": 0.14162675325545648, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016441659153520706}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_precision": 0.3174787632627764, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0026395803173011943}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_recall": 0.2572488981524024, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002020155375464557}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_fmeasure": 0.2677207737523752, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017913031037284717}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_precision": 0.3514161137133654, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0028576511858666056}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_recall": 0.2859476044645928, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022615077541335332}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_fmeasure": 0.29745641696384, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020148180914382}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_5.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_5.json new file mode 100644 index 0000000000000000000000000000000000000000..97820da58986e6b836fb57b35459c008c1251193 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "bleu": 7.595896684409467, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11394951713016016}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_precision": 0.4267054087992628, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003104273111695704}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_recall": 0.33916614278424423, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002448118122778936}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge1_fmeasure": 0.35658924039746537, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002154416981057853}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_precision": 0.17671699676015093, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0022460226448767994}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_recall": 0.13693271552714417, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017033794821993003}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rouge2_fmeasure": 0.14450895819387546, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016668350972141967}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_precision": 0.3255781091740331, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002685842607303053}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_recall": 0.2571008116825621, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020239439722082807}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeL_fmeasure": 0.27045280819438955, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018187716322355006}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_precision": 0.35879686272429967, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002902004869955475}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_recall": 0.2844216790439025, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022327718696866835}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "coherent_text", "rougeLsum_fmeasure": 0.29917487251322744, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "bdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Please generate a restaurant description from the information given below:\n\n{{meaning_representation}} ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002029876079328482}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_0.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_0.json new file mode 100644 index 0000000000000000000000000000000000000000..47ab2b981c17b720dfacfd6639d78e783de4add4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "bleu": 1.5543129195281387, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.053154855082351425}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_precision": 0.16696716992161004, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0025805192223558792}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_recall": 0.2572825872031971, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004212164493341407}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_fmeasure": 0.19711096006674347, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003040616043574641}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_precision": 0.04560883470771521, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0012021308966618743}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_recall": 0.0769178100374361, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002128049838328794}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_fmeasure": 0.05566703607474666, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001468465064769846}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_precision": 0.11835106119201162, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014461966600140647}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_recall": 0.1833920610587413, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0026168954114977718}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_fmeasure": 0.13981728919468525, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00174661286699012}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_precision": 0.13701764629359647, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0019913632568671976}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_recall": 0.21021220612205108, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0033048732121641264}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_fmeasure": 0.1613532463046808, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023500023254129318}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_1.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_1.json new file mode 100644 index 0000000000000000000000000000000000000000..4a99ec79b3919259968804c7fea77e424fb08e96 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "bleu": 4.297512923603484, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07413629268869817}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_precision": 0.3020908787512572, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0036921538937666133}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_recall": 0.25987043503346535, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003141567879746835}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_fmeasure": 0.26520071914836685, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003003658166661617}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_precision": 0.10465034015071716, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0019691347076890445}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_recall": 0.08719562064392963, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015853267824143741}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_fmeasure": 0.08973585628096593, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015772173702039572}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_precision": 0.2204806269646829, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0028685459881547795}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_recall": 0.18825101434769329, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002377652096200304}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_fmeasure": 0.1923500412945955, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002278192833793113}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_precision": 0.24946111667640178, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0032258053795631643}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_recall": 0.2131990405760118, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026891365363325335}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_fmeasure": 0.21805867415782276, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002597589024739917}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_2.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_2.json new file mode 100644 index 0000000000000000000000000000000000000000..dacbf5ca1cddf313dd60019989f2acfa746a77ef --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "bleu": 5.867162895941705, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08645931541468421}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_precision": 0.36350270495948706, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003355398168023335}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_recall": 0.30887853999055825, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002783892987479659}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_fmeasure": 0.31591363153481733, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002562871100672284}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_precision": 0.13698348670528768, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0020426287273337175}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_recall": 0.1136012858172398, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016553465039351092}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_fmeasure": 0.11659264308512794, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015973372961016513}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_precision": 0.27091169751519845, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027121576953348142}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_recall": 0.2289429812438254, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021934381495222347}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_fmeasure": 0.2340979483687512, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002014832984376413}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_precision": 0.30254398731438875, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003008055599147345}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_recall": 0.256276398824361, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0024731203037475064}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_fmeasure": 0.26219793663870944, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022935285784009335}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_3.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_3.json new file mode 100644 index 0000000000000000000000000000000000000000..da3570510ac927b567cafbe90d8095ad77e4d71a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "bleu": 6.900082747494956, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11320138722526062}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_precision": 0.40185459499224246, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0031344547603992636}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_recall": 0.3286363837625974, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002532026641586088}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_fmeasure": 0.3408847326310388, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002233633388817911}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_precision": 0.16009806566984877, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0021974508251131177}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_recall": 0.12657236948027373, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016617454893539316}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_fmeasure": 0.13197675444985899, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001607348415179031}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_precision": 0.3052474939342758, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027064485265094005}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_recall": 0.2472552075503705, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020502376164631794}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_fmeasure": 0.2567441918347381, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018394022581844677}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_precision": 0.3376547544199217, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002925952642220336}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_recall": 0.27426355732116464, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002262112871842013}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_fmeasure": 0.2849098056411087, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020490356453933285}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_4.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_4.json new file mode 100644 index 0000000000000000000000000000000000000000..df540f7d74f6efe9670f0ac663530da9c629e548 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "bleu": 7.320031747652725, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12923534694654085}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_precision": 0.41515066909927334, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0031037936370394118}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_recall": 0.33321944407915566, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00246664110665828}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_fmeasure": 0.34908186170828154, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002179669059327166}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_precision": 0.17014937408764044, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0022566453563309186}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_recall": 0.13226023424874878, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016784568847184593}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_fmeasure": 0.13929100917815865, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001648417738388695}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_precision": 0.3170205721100898, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002708866680963841}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_recall": 0.25219110868882494, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020045842681778495}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_fmeasure": 0.26448509530378633, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018168804060454762}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_precision": 0.34988097982765304, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0029179543208130904}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_recall": 0.27907105999460174, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002218854839510192}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_fmeasure": 0.2927886602264322, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020229110972793574}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_5.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_5.json new file mode 100644 index 0000000000000000000000000000000000000000..8a58a0a85282f0b2a2962a9721d0e7a7ba516f9a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "bleu": 7.579729216459941, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10379442969910549}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_precision": 0.4245878302927168, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030985552342163206}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_recall": 0.3351370843643141, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0024378280020133556}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge1_fmeasure": 0.3538095251081614, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021545871849702673}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_precision": 0.17516980829315892, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0022649531690162985}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_recall": 0.13470386962627315, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001692225627937733}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rouge2_fmeasure": 0.14278771616071478, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016729015640305864}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_precision": 0.32504959215644774, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002692604155146814}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_recall": 0.2547905097073822, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002009386966136313}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeL_fmeasure": 0.26921100421071603, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018239111737229203}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_precision": 0.3579093459662674, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0029111349234790906}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_recall": 0.2817445669200069, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002227797407847849}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "create_text_for_me", "rougeLsum_fmeasure": 0.2975768458732384, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "711bcf63-be82-4937-bdef-0c379d20bb74", "prompt_jinja": "How would we create an appropriate text out of the following data?\n{{meaning_representation}}\n\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002040211020017694}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_0.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_0.json new file mode 100644 index 0000000000000000000000000000000000000000..21b8d77713179ddcbf4364a1e9aca2b32ffa23b3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "bleu": 3.357880740552639, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05911525500616559}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_precision": 0.15817916976766455, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011411953748066586}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_recall": 0.1449753921655427, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0013358740390814108}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_fmeasure": 0.14809698368434102, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0011506472021475076}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_precision": 0.10846978571330382, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009492436851693913}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_recall": 0.0996008655703363, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0010665340200279952}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_fmeasure": 0.10146880175157826, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009392019833054901}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_precision": 0.15815833643433122, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011396842377164138}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_recall": 0.14495687364702417, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0013348745641402115}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_fmeasure": 0.14807737584120376, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0011493153027645868}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_precision": 0.15601081480161114, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001103835539038039}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_recall": 0.14288832426456227, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0012946708510962023}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_fmeasure": 0.14601036905242729, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0011113659770113293}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_1.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_1.json new file mode 100644 index 0000000000000000000000000000000000000000..737871d42707efe4bbb946db34662bd7b0bf6781 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "bleu": 4.2427683279460675, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06477494709625758}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_precision": 0.21918225259331264, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00210014169544477}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_recall": 0.22082556498109132, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0024742493122335047}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_fmeasure": 0.21181656379511446, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020085926283200336}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_precision": 0.1163871008313897, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001129914509168705}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_recall": 0.11332262657648863, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0012724108998245058}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_fmeasure": 0.11107876311211701, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001083915673794437}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_precision": 0.19106349917495133, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014505997962077457}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_recall": 0.1894466427618812, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0017589499235787709}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_fmeasure": 0.18333822193791077, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013426559404332522}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_precision": 0.19958112129901742, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017237243835404959}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_recall": 0.1990541967175752, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002038778096964972}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_fmeasure": 0.19201664569930915, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016232028917454438}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_2.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_2.json new file mode 100644 index 0000000000000000000000000000000000000000..809e1b102216de27ed2aa8ad4816636b75eb5395 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "bleu": 5.051230966031554, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13591434741266697}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_precision": 0.257733606387396, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002854189812923714}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_recall": 0.2484296444250448, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0028744090194746633}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_fmeasure": 0.24111306272957858, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0024738591515231123}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_precision": 0.12122028894875721, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001566218681260604}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_recall": 0.11563987732519745, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015422732429049785}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_fmeasure": 0.11330107614177073, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013577176314569378}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_precision": 0.21371701405030952, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0020806238798557787}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_recall": 0.20335467777797223, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020284517357400562}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_fmeasure": 0.19848666892850747, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016655731215332195}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_precision": 0.22672124933887802, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002428427353019781}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_recall": 0.21696666131837813, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002404135899742133}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_fmeasure": 0.21119700236219294, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002040241369573437}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_3.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_3.json new file mode 100644 index 0000000000000000000000000000000000000000..530f5acbba48616d117f3e6647c46f55ae8a49d2 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "bleu": 5.994293739144544, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13601894534552475}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_precision": 0.2862040020667472, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003238321483446096}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_recall": 0.26830082610544254, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003078316721189308}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_fmeasure": 0.2629515004088073, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002731946803977037}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_precision": 0.12802276569983448, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0018589047775403413}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_recall": 0.11943444822480005, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017609621427282169}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_fmeasure": 0.11772014214137563, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016007488021205615}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_precision": 0.23037063366617458, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002402866496678994}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_recall": 0.21411130784418406, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002232311335363655}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_fmeasure": 0.2105240941617669, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019150789004942635}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_precision": 0.24519320098691047, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0027928419904608157}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_recall": 0.22888580022427765, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002615628628359519}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_fmeasure": 0.22458434070337602, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00230510570165974}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_4.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_4.json new file mode 100644 index 0000000000000000000000000000000000000000..f69e1c57208357139470aee08c83948e7c4a3245 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "bleu": 6.295897046683775, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11584821440017962}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_precision": 0.3075285202414195, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0033945393939165606}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_recall": 0.2801004454252903, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0031191985284490546}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_fmeasure": 0.2770259703392387, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00277806824406267}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_precision": 0.13304703967240197, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0020060838809736344}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_recall": 0.12206189669119812, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018573319105401854}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_fmeasure": 0.1204704109621881, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016880105990733003}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_precision": 0.244640019205779, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0025913691710492746}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_recall": 0.2210765996441547, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002310185867597945}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_fmeasure": 0.21909517879981463, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019961120480864922}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_precision": 0.26210204661833736, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0029901819656978864}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_recall": 0.23756462084158286, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002680022085644319}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_fmeasure": 0.23521486729488383, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002390395310603108}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_5.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_5.json new file mode 100644 index 0000000000000000000000000000000000000000..f950a8bc2a3d771251196f7e77cd07657f2c30f4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "bleu": 6.703087166294118, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13595717497233803}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_precision": 0.32943509738961013, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0035748420223814196}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_recall": 0.2922771657368818, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0031239846598688573}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge1_fmeasure": 0.29157203264366427, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0028155194363036675}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_precision": 0.14104223605836952, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00213159571945594}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_recall": 0.12618593841822234, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018876455590173392}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rouge2_fmeasure": 0.12548313742099432, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0017504707119138294}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_precision": 0.2584532209858507, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002742447042465613}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_recall": 0.22794142525227126, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0023233899066376004}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeL_fmeasure": 0.22747557648943484, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002019174394965523}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_precision": 0.2777892174460894, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031494031496789106}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_recall": 0.24548258109140436, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002697258051680226}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_gramatically_correct_text", "rougeLsum_fmeasure": 0.24501087162389928, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "0f54b6e2-42c0-45ec-8ea2-2e6204388f76", "prompt_jinja": "Combine all of the following data into a concise and grammatically correct text:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\n||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002431781683361551}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_0.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..a3c04a5834bdc719ad61cb836f93bdf85dc7bfb5 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 0.034167755941272124, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.007307628734226665}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.19301807890145306, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001670641964229549}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.1302779588596268, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0009550048156890856}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.1471748484883756, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0009958043221996186}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.0024668841759750483, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00021558605140782248}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.0024274910482947904, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00020167632183355254}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.0022741562354756397, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0001859749506718914}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.1613842777209177, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012980656999503429}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.10906527422415299, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0007641988419152662}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.12313336484112318, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0007638110810765412}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.1737032149967068, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015574347646916537}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.11681049061765152, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0008822320800065422}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.1321175572520423, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0009257711780790996}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_1.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..85fbe2c1a4e520255f880d3c345a7f496b7bf13d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 6.480953332643209, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1333798473522777}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.40802697478059297, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0029673602294223223}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.3261039583636633, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025124189387771036}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.34215503945412495, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002167322468153194}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.153945929557076, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00209119791686883}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.12079622988004886, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016791741735485287}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.12715929154890004, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016365489693603613}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3007493740437511, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002487305156158438}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.23850764812452344, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019836094262200904}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.25061818922473783, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017641406764575026}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3365871813932031, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002735631481473113}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.26820407020047227, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002246895665921754}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.28167871848928877, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020153231718161705}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_2.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ff00c36a6b2aa865027a92b16698f5c0ca3c3d3e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 7.666105297947792, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13136205041430496}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4208785617206442, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030817416724062204}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.3424204393428585, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025248373231055543}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.3560820001983814, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021795777882845308}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.17233048865607528, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002143190552622057}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.1383316167107616, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017503067979860562}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.14362551846595048, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001639558679939512}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.31589969893422243, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002592218877775354}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.2566141330684389, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020689228041648397}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2663812276115756, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018062222315789584}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3509539248702352, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0028422730753440103}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.2852154582143963, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023210148627271433}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.2965891442412763, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020592217464907307}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_3.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..22fe975bf968b237ae039f3285250c9da3f1b3c7 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 8.56058254286114, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15123807567499506}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.43327921520363066, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003097568614374566}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.34769690369124, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002543918652030485}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.36308270867669845, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002191649011082704}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.18895843732454584, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002286888650673502}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.14872848902577038, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018036895683928664}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.155349432200804, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016980107119711429}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.33501338216238535, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027058006091930545}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.267043780702506, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002094068308249151}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.27893233629167613, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018361143289552099}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3685423655496754, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002930111403674498}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.29466685169867624, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002347617054544211}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.30795704419792747, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020858121305838396}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_4.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..7894f1d8f3434b829a4009f6ec6726c6af308cb3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 8.949311991420602, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15855472500531886}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4375804506933646, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032055526063119576}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.3472946902285877, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002539437414200631}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.3640451950801677, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022138110432792396}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.19419957071290792, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002374016494731067}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.15145528896504676, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001854581130642659}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.15872608958193554, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0017627421907090207}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3396411151186514, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027863819229066904}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.26869178202876687, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021284030057315906}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2812482066830368, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018755893236809696}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.37367146023180203, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0030307585676874636}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.29617959760674445, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023937351643062293}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.31039320669801307, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002134177354316102}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_5.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..94420b6cd62336b0bd2900d7522051aad26ae9d2 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 9.131873104913453, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1609494007900667}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4394755159466518, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032068250726382916}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.35023402614950333, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025006999002648547}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.3666709721169093, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021727553535898286}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.19707980839947012, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0023944009918063663}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.15380875597263907, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018326439588638127}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.16117969333074014, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0017520909045254573}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3455815272270704, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0028520716467400477}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.2740572130821665, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021232292917631344}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2867364556172849, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018811550503636599}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3778806601818275, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0030588224104805}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3002582190319247, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023482445311229245}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3144340022223061, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002098469397474867}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_0.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_0.json new file mode 100644 index 0000000000000000000000000000000000000000..ee983261722d80da8d6849ced1dfba68b8a11f48 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "bleu": 3.1072073402263745, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.054847709586604974}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_precision": 0.2904834399201357, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003209479431805363}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_recall": 0.378959200854369, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0033573634989133632}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_fmeasure": 0.3192562818462074, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003103481656865381}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_precision": 0.12493081488647073, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0014706683187466658}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_recall": 0.16664906976511587, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001807700709560424}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_fmeasure": 0.13831925575429133, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0014991664215535973}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_precision": 0.22666655050825654, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0021721413918810626}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_recall": 0.30700944872644476, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0027337619577674933}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_fmeasure": 0.2530898959654513, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002233323932714259}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_precision": 0.24444049681203178, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0025608900739077897}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_recall": 0.32402288867669254, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0028828379788400547}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_fmeasure": 0.27039500281410955, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002525774163861117}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_1.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_1.json new file mode 100644 index 0000000000000000000000000000000000000000..87c241a7fb2b34b7f91e784250a9da466f8a9e1d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "bleu": 5.768904743696091, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07694130893367676}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_precision": 0.38598841646744675, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0029352063511176927}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_recall": 0.3120167715681631, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002282231208925737}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_fmeasure": 0.32587000405265004, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002055881315827058}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_precision": 0.141089766958126, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0019905359417947705}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_recall": 0.1103023596204418, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014920441369873467}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_fmeasure": 0.11624053272466801, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001498193127795664}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_precision": 0.28626570995375356, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0024472272782447135}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_recall": 0.23012511993056003, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0018339215891965562}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_fmeasure": 0.24041537230720197, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001681675906589535}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_precision": 0.31879426513272485, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002698431810379339}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_recall": 0.25680714373294367, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002060514880618308}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_fmeasure": 0.2683840002761712, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019114719552552634}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_2.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_2.json new file mode 100644 index 0000000000000000000000000000000000000000..03d06f00c3c43407df2c89fa46400e1f6c392f77 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "bleu": 6.623187234910589, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09284748268311233}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_precision": 0.40593215484811684, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003106340108516954}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_recall": 0.32006154223100713, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002324542554838695}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_fmeasure": 0.336751496891913, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002100952186916202}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_precision": 0.15978879415281969, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0021982896081617956}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_recall": 0.12215543191803664, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015773240400394131}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_fmeasure": 0.12917141922678907, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001569747986831894}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_precision": 0.3108908870060753, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002663105799005185}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_recall": 0.24343947890677778, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0018898553924389825}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_fmeasure": 0.2561811353185624, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017410449710564626}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_precision": 0.34104283897670223, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0028793434150954354}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_recall": 0.2679333081128248, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002109219796922622}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_fmeasure": 0.28203219973113697, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019575989714408007}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_3.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_3.json new file mode 100644 index 0000000000000000000000000000000000000000..13da9ac7dda5401ba0614a513e9edaa3afc18e90 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "bleu": 7.009840500917634, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.16312381322545783}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_precision": 0.42255037024132486, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0031250780400183006}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_recall": 0.3217085619415695, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0023676195043913673}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_fmeasure": 0.3430320576782421, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020936920124836156}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_precision": 0.17364077254123408, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002316697656460501}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_recall": 0.12769222375851083, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016388892071143308}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_fmeasure": 0.13699349439736094, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016218763137671342}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_precision": 0.3285519495441955, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002763683063352353}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_recall": 0.2480739442674065, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019648687514971755}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_fmeasure": 0.2647497003936544, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017804230943033162}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_precision": 0.35718768502576903, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00295212627343816}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_recall": 0.2706627854942478, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0021634238449362055}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_fmeasure": 0.28884517699797746, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019739366315454183}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_4.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_4.json new file mode 100644 index 0000000000000000000000000000000000000000..3afbcf1653d7e59c57c8257dc5b2c206964cc6fb --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "bleu": 7.107067863801302, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.16706105397816348}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_precision": 0.42460043468036757, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0031591950135965727}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_recall": 0.3214789344478919, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002360243252337688}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_fmeasure": 0.3436390706497596, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002120851612678163}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_precision": 0.17482332505179643, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002374641561754068}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_recall": 0.12745182984114253, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016392080775069744}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_fmeasure": 0.1374084834705204, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016581364245938277}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_precision": 0.32947339330122327, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00275863160551055}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_recall": 0.24728616508687778, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019282863931353905}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_fmeasure": 0.2647579237213868, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017899579331400889}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_precision": 0.3590563916220248, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0029587623344615264}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_recall": 0.2708954741926685, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002158968315143282}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_fmeasure": 0.2897245093598933, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001995382121885309}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_5.json b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_5.json new file mode 100644 index 0000000000000000000000000000000000000000..bcec0339928c6a041ea8b71e9ac69f56ca04f64e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_e2e_nlg_cleaned_text_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "bleu": 7.274383164727865, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11122428262789567}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_precision": 0.43124377131693664, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003211030428742271}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_recall": 0.3239350199889235, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002364971988949829}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge1_fmeasure": 0.3469575519675918, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021034752646403756}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_precision": 0.17902835655762828, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002400473530717413}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_recall": 0.1292480377318174, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016477268883042123}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rouge2_fmeasure": 0.13963971276485945, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016673499985951031}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_precision": 0.3361429661493787, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002859024482019008}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_recall": 0.250039081212901, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019599039144951795}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeL_fmeasure": 0.26834384059308486, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018122635157761808}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_precision": 0.36599409686858575, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003031621767071787}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_recall": 0.27374374119186806, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0021570988272421876}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "text", "rougeLsum_fmeasure": 0.29352695616349106, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "cdecbb5a-d3e8-46f3-9ea8-22025bc59e3b", "prompt_jinja": "Information: {{meaning_representation}}\\nDescription: ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019956203711713662}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_0.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_0.json new file mode 100644 index 0000000000000000000000000000000000000000..ef1f5005abbd6988b1e7cb5ca089671fd8bda9a8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_precision": 0.08690252895592265, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014696237802313788}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_recall": 0.21076760136855094, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0033990058849462626}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_fmeasure": 0.12154953382545712, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019861476396373216}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_precision": 0.011835058486175484, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006782998894859348}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_recall": 0.03014924646277289, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016485838218151638}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_fmeasure": 0.01678428011344468, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009370052903803109}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_precision": 0.07649508725038684, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012036928723305662}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_recall": 0.18622709296396378, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0028294388338103195}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_fmeasure": 0.1070578662991953, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016219964231111727}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_precision": 0.06936681100471268, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011946703655621098}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_recall": 0.17032808205023134, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002899791797120825}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_fmeasure": 0.0973663233955277, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016371882205093247}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "bleu": 0.6380986430759009, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09497300417280843}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_1.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_1.json new file mode 100644 index 0000000000000000000000000000000000000000..8e0f2c203535955bd1e8de55aa5a1356bf6b0ee8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_precision": 0.11321015578844655, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018833111961485554}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_recall": 0.18538730900428743, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0037105443176058024}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_fmeasure": 0.13006038778542497, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002066345113965779}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_precision": 0.008030799491936448, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005974414949521437}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_recall": 0.01630658025640146, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013333260688818906}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_fmeasure": 0.010073796253903308, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0007521556541790673}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_precision": 0.0852182314237414, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014284391091660574}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_recall": 0.13603732951225597, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0025190738181756775}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_fmeasure": 0.09662643610919455, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001430668974086877}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_precision": 0.09002789354250691, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014892889301457962}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_recall": 0.14770301407061567, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002942468251426108}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_fmeasure": 0.1033309158326748, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00161329986842399}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "bleu": 0.4956718515669784, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06144134252581695}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_2.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_2.json new file mode 100644 index 0000000000000000000000000000000000000000..b801c5c45cf9d0d26696cce261c6e8f2cd509d4e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_precision": 0.115303289612093, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019631897374271976}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_recall": 0.1867727451057763, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00361199337825402}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_fmeasure": 0.1321353013501063, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020733569592054284}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_precision": 0.009371984198219677, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006185339230947803}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_recall": 0.018833588882257533, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013508882388750508}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_fmeasure": 0.011705506769985538, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0007650466477809864}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_precision": 0.08671980544288431, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014435281648377727}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_recall": 0.13855429960506976, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002496409762111219}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_fmeasure": 0.098676375925058, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014409776012497158}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_precision": 0.0903490116138123, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014955778687851201}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_recall": 0.14710100552982236, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002836465397213484}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_fmeasure": 0.10364656157003836, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015811434930502115}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "bleu": 0.4649248578493213, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0713213099450011}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_3.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_3.json new file mode 100644 index 0000000000000000000000000000000000000000..13ea9b728e9201c88859d95a0d8eb270a061cc07 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_precision": 0.11030819972819507, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002126312863119683}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_recall": 0.18142324171289778, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003822441819750713}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_fmeasure": 0.12726617865153672, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023059251743363816}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_precision": 0.00966434257562582, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006608656525792718}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_recall": 0.019946294126745322, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013598720985991016}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_fmeasure": 0.012326768637832572, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008208778930401783}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_precision": 0.0829643367413115, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015835310219049267}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_recall": 0.13470266239538448, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002713889010739294}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_fmeasure": 0.09508927308463527, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001653234972150023}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_precision": 0.08664042360478007, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016238957338166923}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_recall": 0.14392387149210184, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003047831244734627}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_fmeasure": 0.10033551040789061, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017901125661630923}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "bleu": 0.6897184873606464, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09929118591535131}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_4.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_4.json new file mode 100644 index 0000000000000000000000000000000000000000..ac3dd2805dddc7c24c6df2d615f4a3ecfaf4b196 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_precision": 0.03312872880982003, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0023364937989206456}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_recall": 0.04331842400385379, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029785608067230333}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_fmeasure": 0.032825069850753535, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002115896509151276}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_precision": 0.004031880720388147, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007055581272082209}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_recall": 0.006940542757637712, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0010097939996129512}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_fmeasure": 0.0045358862257299385, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000680391156202852}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_precision": 0.02612465010671101, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018898939348251488}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_recall": 0.033505823824258554, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002305244406205452}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_fmeasure": 0.02557068204061304, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016585142146009432}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_precision": 0.02683188172054129, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0019196788856535153}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_recall": 0.035158918419347346, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0024488884244711472}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_fmeasure": 0.026530666924173267, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017221175504394289}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "bleu": 0.18440179296896278, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.034342854775642216}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_5.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_5.json new file mode 100644 index 0000000000000000000000000000000000000000..01ee975639bc1118ab15a9de08f7207484204902 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_precision": 0.003430531732418525, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017130559457731933}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_recall": 0.0001996386258755556, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 9.987616180882829e-05}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge1_fmeasure": 0.0003772463816858933, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00018869438087139786}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rouge2_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_precision": 0.003430531732418525, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017130559457731933}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_recall": 0.0001996386258755556, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 9.987616180882829e-05}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeL_fmeasure": 0.0003772463816858933, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00018869438087139786}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_precision": 0.003430531732418525, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017130559457731933}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_recall": 0.0001996386258755556, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 9.987616180882829e-05}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "rougeLsum_fmeasure": 0.0003772463816858933, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00018869438087139786}, {"task_name": "gem_xsum", "prompt_name": "DOC_boils_down_to_simple_idea_that", "bleu": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "2b9c75ca-2848-4a63-b3ce-b86ea2e2d7e8", "prompt_jinja": "{{document}}\nThis boils down to the simple idea that ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_0.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_0.json new file mode 100644 index 0000000000000000000000000000000000000000..40c00a6482cfdd95180c6bded1126be77be35ce4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_precision": 0.09636462071540756, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016678733192933997}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_recall": 0.23676664119512633, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00385324499120598}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_fmeasure": 0.1353054364687284, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002260494132460735}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_precision": 0.01612079371257744, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007668663392734058}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_recall": 0.0407993478012527, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018811374457448548}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_fmeasure": 0.02280597280652988, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010663990484613007}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_precision": 0.0829190298668844, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012981572345846506}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_recall": 0.2049398850561697, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003055116119810891}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_fmeasure": 0.11660002475056051, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017579801450236503}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_precision": 0.07393255459872417, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001314680148354152}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_recall": 0.18411144544228827, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0031904748886971623}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_fmeasure": 0.10415585675049703, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018023677161240045}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "bleu": 0.8587282218390164, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1092988209328534}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_1.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_1.json new file mode 100644 index 0000000000000000000000000000000000000000..f4e0263aa82bd04acb756e51af7bf6cb74a5c4b1 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_precision": 0.11994693258726821, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002272664862037966}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_recall": 0.2063208707620612, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003701405279713943}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_fmeasure": 0.13999313273083266, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022002140826437623}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_precision": 0.01473220123107267, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008753263893803662}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_recall": 0.029347661070468774, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015701480680528376}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_fmeasure": 0.018246524077524105, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009676889512530071}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_precision": 0.097949771945427, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017742187935061273}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_recall": 0.17075558002497426, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0029345772747996486}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_fmeasure": 0.11473299946041993, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016673962826521769}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_precision": 0.09414781020562817, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018145520881081784}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_recall": 0.16167126991391795, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0029104592060735126}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_fmeasure": 0.10940608132454639, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016933691201315847}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "bleu": 0.7674350493376833, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04722261703727283}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_2.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ebecf81fe3525e15bd45444dc47b4e2f147ff2d8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_precision": 0.1272859637960522, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0027462425850244753}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_recall": 0.20726108396447318, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003480174389659312}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_fmeasure": 0.1432459780290282, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023213704285313403}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_precision": 0.01752090200988848, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010115578307486322}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_recall": 0.032264766808514124, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017258708804944215}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_fmeasure": 0.020808658216032574, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001109865173400727}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_precision": 0.10342334775733361, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002079516722716056}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_recall": 0.17360666431954774, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0028928707472305787}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_fmeasure": 0.11786898777293418, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017897922596018406}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_precision": 0.09983622506749402, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002115393492482011}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_recall": 0.16461169268640363, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002819701566946129}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_fmeasure": 0.1127637907926372, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018017461138697946}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "bleu": 0.9152942535263101, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11054552948281171}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_3.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_3.json new file mode 100644 index 0000000000000000000000000000000000000000..4a38e6df0303c0721c09d2fe8270a83603ccb356 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_precision": 0.1289330354185616, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030022141554972227}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_recall": 0.19771559769715022, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0036123774127182606}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_fmeasure": 0.14064423672321408, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0025293681234337334}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_precision": 0.018171280861945326, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009931234243246655}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_recall": 0.030785833829197685, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001613050728074892}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_fmeasure": 0.02056912761205674, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010455547309245617}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_precision": 0.10432784481750776, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0022625245110963936}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_recall": 0.16542514116729834, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002987101666461712}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_fmeasure": 0.11535113963082101, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019250312942772209}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_precision": 0.1017214982197284, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0023262186712280047}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_recall": 0.15836180866185806, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0029184359176326665}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_fmeasure": 0.11133583672050468, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019306189975949863}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "bleu": 0.8295010385712954, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10729215881030962}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_4.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_4.json new file mode 100644 index 0000000000000000000000000000000000000000..e6d9db81fb4e512da68559eba23ad73674c92c38 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_precision": 0.04257852396991355, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002914976483248318}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_recall": 0.05044280044044616, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0030109427993094294}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_fmeasure": 0.039198145576751886, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022897766752655114}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_precision": 0.005213726639706752, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007421737476799913}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_recall": 0.007806425202197695, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009097455571182794}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_fmeasure": 0.005605133916804629, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000674983978729807}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_precision": 0.035672949988739856, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002547892797474333}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_recall": 0.042169944453916135, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002493645678486285}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_fmeasure": 0.03233599361611141, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018540861576038941}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_precision": 0.03487509346325613, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0025355122945714846}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_recall": 0.04044744995699889, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002420429517483526}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_fmeasure": 0.03127361667218934, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001818299978680775}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "bleu": 0.2878314198644634, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09522067958952156}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_5.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_5.json new file mode 100644 index 0000000000000000000000000000000000000000..20622e33ef033d2e97bbd28fa0befb207448b629 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_DOC_tldr_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_precision": 0.002936256429086033, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0008325661288169507}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_recall": 0.0021595962684425826, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0005981401663289608}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge1_fmeasure": 0.0023996080385096956, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006585056185108358}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_precision": 0.00041274393383161417, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0001973715747410765}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_recall": 0.0003399001109877913, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000164471497611397}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rouge2_fmeasure": 0.0003713888095226837, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00017882541746315305}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_precision": 0.0024675512225894843, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.000714409976176186}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_recall": 0.0017999664297352442, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0004895093404742078}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeL_fmeasure": 0.0019987192664100155, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0005427413769701772}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_precision": 0.002632861772195261, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.000753763054701241}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_recall": 0.001937003366161748, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.000531587586598376}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "rougeLsum_fmeasure": 0.0021468678403871737, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005854611584876265}, {"task_name": "gem_xsum", "prompt_name": "DOC_tldr", "bleu": 3.539919827428958e-43, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "db54e9b5-8ca9-4266-a773-695a3dc5bbf4", "prompt_jinja": "{{document}}\n\nTL;DR: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 2.1011888360679252e-37}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_0.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..6f12d8c696818b04a092bbc257975680b9546361 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.10993607957935606, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017262014910658778}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2671733452707944, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00390137507010999}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1538448102106363, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023116204942085577}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.01835130554145112, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007993797195400837}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.04604209148970363, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001973243132477277}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.02588589928345875, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011070282134188164}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.09053264538484676, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012769612144616453}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.22129938057010246, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00293788000817368}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.12685128021061992, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017022653082941958}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.08513932826931309, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013864967970779066}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.208617316083849, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003259825355111851}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.11937933477851356, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018740476749061043}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.9574482759158731, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08037361986575367}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_1.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..a5e79195703fe6f99747d152c06c903886d91aeb --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.12072537979214573, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002167454500355592}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.21838713615156446, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0038259542032858217}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1452725178193069, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002296281448425658}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.014777621125809935, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008637330993774701}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.03154673810471188, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001733277417697726}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.01908194699059756, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010479813934091}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.09551583464454579, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015465079617143938}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.17601089949518756, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0029605535227822544}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11573111078031734, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001668882154190853}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.09358138198842439, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016368426037437229}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.1713680364199079, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003137326232956779}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.11300827952303219, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017954449161295192}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.8464028940550716, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12057452438636702}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_2.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ade7967b1b1d4764b6d6b7276c84d6fdbf1b7e18 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.1277087568466237, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002600936480819687}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.21240423763263364, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003539650124058849}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.14627685627701034, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023199421629749627}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.016452983669055295, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009373447546795308}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.031084003624943502, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016581096554683838}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.01990450612056915, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010553925331886055}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10316942850814137, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001971701438481884}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.17539290041445985, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002825968625575816}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11915492784443057, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017454981401993368}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.10014675747700362, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0020339193953665207}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.16798366392500885, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002901019287841266}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.11487832313530352, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018236145830277607}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.7984106419011859, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1063368416287446}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_3.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..6bdd9470469cb01f6c50a65308eba704441ed4fc --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.13645821087841936, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030874846619215407}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.20065597941747373, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0036985721066628736}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1467279970360384, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0026790925158139463}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.019488958963057586, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0011775452187260563}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.030961861144012604, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017081744279510592}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.02153589403065349, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00118190969520155}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.11007763632391766, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002410392914612521}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.1663257674332732, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0030787485034055036}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11938767343543244, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002073060283013559}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.10704427774578769, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0024643770024296028}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.15838825460379957, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0030224100131612764}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.11495841768374035, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0021071380507211635}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.9423552122227806, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13782619268270152}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_4.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..8c24e899fa102b868023dda1f490e1818d557f85 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.04085291524121532, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002589850198863279}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.049526509450874855, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00300127990043923}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.039591317930521744, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022931685649421212}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.005319857832468002, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007156080942971114}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.007574332469978282, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009017550685095564}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.005510795996651232, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0006213462695311744}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.03358813626286002, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002206945445578637}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.04060546427390794, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024697862019406065}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.03214999836248129, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018423539820022828}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.03309676277901219, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002199818877733498}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.03950562122200865, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0024124824570383946}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.03146184764090875, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018112412925546117}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.17189105743351635, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.038682318528850144}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_5.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..63a2d91c62d494e7396b3adc57252c9dd6b1de48 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.0035305925957395084, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0009785490984386396}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0030440980529733517, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0008638602364047491}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0031357253217098413, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0008645982383809647}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0007350839134967882, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00032922794052150407}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.0006963162623539982, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0003353629065457281}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.0007075672984960149, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003298891212277737}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0028741250344439784, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0007961665239105527}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0025236529245087863, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0007285140262655341}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0025693656488664164, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0007105243349554819}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0030230196408857547, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0008304280978614765}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.0026492658288523934, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0007552587612647047}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0027055779382418573, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0007426343024247237}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 3.736787794674515e-39, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 8.045576123348552e-33}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_0.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_0.json new file mode 100644 index 0000000000000000000000000000000000000000..c1d27369af1189a88e31a65ebc118e9455c84d98 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_precision": 0.09630367276935313, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001534883053115542}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_recall": 0.23460591986325105, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0035249062730580206}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_fmeasure": 0.13477841342912497, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002063346031228702}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_precision": 0.015863901454312258, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000709892449539515}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_recall": 0.03990372817053524, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018147573204489273}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_fmeasure": 0.02239979264930804, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000997880305154405}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_precision": 0.08476913174599572, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012339605708654188}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_recall": 0.20729064806304343, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0028798441356743133}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_fmeasure": 0.11875101756591155, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016571866332250852}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_precision": 0.07482920590283132, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012132438830642453}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_recall": 0.1845581778085984, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0029597709556143095}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_fmeasure": 0.10504435524250771, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016540156460037813}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "bleu": 0.827022947248256, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07927582653684777}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_1.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_1.json new file mode 100644 index 0000000000000000000000000000000000000000..2fa0e6e0cb11c729b68d285e2c2c1af6ecd9269b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_precision": 0.09967812302039447, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017535656187891884}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_recall": 0.23532487213764122, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0037931530072197895}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_fmeasure": 0.13585887003860755, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002175476639683441}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_precision": 0.015458406614911558, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007847122204995969}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_recall": 0.03955801765769148, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002028121799206321}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_fmeasure": 0.02178998612851725, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001087232464799662}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_precision": 0.08499045094039762, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001391372178337716}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_recall": 0.20179056951969515, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003044247136206819}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_fmeasure": 0.1159863670452197, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016981881580244976}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_precision": 0.07807531947048092, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014381437479792241}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_recall": 0.1855216996322975, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003180975541626223}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_fmeasure": 0.1064016446915024, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017712110403532188}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "bleu": 0.9250777542928637, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0995464024191013}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_2.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_2.json new file mode 100644 index 0000000000000000000000000000000000000000..4b41acde116c2094351cf0c5da43b0d342a99eb3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_precision": 0.09893709052188565, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017414703745454985}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_recall": 0.23440267735863968, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003498471803075341}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_fmeasure": 0.1349531450843222, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020232857713247897}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_precision": 0.015158067141100342, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008079828861487166}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_recall": 0.037206559662769104, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017788028367721072}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_fmeasure": 0.020699762711342613, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009604579863830597}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_precision": 0.08507459833948786, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013961283150575271}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_recall": 0.20301299123076172, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0028262793046608258}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_fmeasure": 0.11632088160061539, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001583707769523938}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_precision": 0.07692952970836399, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001416058779535236}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_recall": 0.1833856652655966, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0028828605595491133}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_fmeasure": 0.10493620226111898, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016118116058243702}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "bleu": 0.7540448660432285, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09539115530526798}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_3.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_3.json new file mode 100644 index 0000000000000000000000000000000000000000..7636f7ad4cf613c4b155f3a1a74a39b47c6866d0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_precision": 0.09822153128767802, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0021428329923455346}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_recall": 0.2173704671442931, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003785275093896278}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_fmeasure": 0.1288656936450777, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002285142640984343}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_precision": 0.015471263083193475, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007775392244828585}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_recall": 0.03648134817348442, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018073965169390764}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_fmeasure": 0.02079595560428378, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010061139041632509}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_precision": 0.08524370672290423, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017456965733000762}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_recall": 0.19064982924857338, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0031548397347180086}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_fmeasure": 0.11230658338330175, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001849039452146557}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_precision": 0.07709178118688563, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017340953216017302}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_recall": 0.1714803560163439, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0030653620428528205}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_fmeasure": 0.10101095703467147, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001798939254091859}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "bleu": 0.8519606095372889, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10081770314780626}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_4.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_4.json new file mode 100644 index 0000000000000000000000000000000000000000..14372586f5e4f4a9c6dae10e3d53bfc4704c72d2 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_precision": 0.03689476483753298, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0024463384773361955}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_recall": 0.05735145180342211, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003370593307800384}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_fmeasure": 0.0392783960707505, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022527501571849374}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_precision": 0.005453105497796419, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007237955340670446}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_recall": 0.009826935945674431, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001076383576002216}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_fmeasure": 0.006225565560992794, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0006675181095595146}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_precision": 0.030609500618293827, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00201564752141265}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_recall": 0.04862705218381443, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0027960780471146885}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_fmeasure": 0.03280903331157746, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018180599330376993}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_precision": 0.028977502639200816, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001997201119048309}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_recall": 0.04481380017813916, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026573416129651475}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_fmeasure": 0.030578597001076086, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017593294917667497}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "bleu": 0.311490095229844, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08056516332123577}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_5.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_5.json new file mode 100644 index 0000000000000000000000000000000000000000..5e914ab9753b806ae44ca7f0b34a7385be59f917 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_DOC_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_precision": 0.0027508790221273424, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0007706396307316284}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_recall": 0.0022719827482217438, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.000646293142327662}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge1_fmeasure": 0.002421692802983106, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006764738834041464}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_precision": 0.0004918476824692141, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00020999706938142754}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_recall": 0.00044210136884942656, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00020463083076435227}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rouge2_fmeasure": 0.0004607621449381724, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00020508003752744115}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_precision": 0.0024136119947061004, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0006725506737183709}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_recall": 0.0019698513077900763, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0005402889527601966}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeL_fmeasure": 0.0021069927716341674, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0005727525693181224}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_precision": 0.0024665686894058876, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0007035053623232305}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_recall": 0.0020393354574629054, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0005858747974607231}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "rougeLsum_fmeasure": 0.002169328410821175, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0006128588314949923}, {"task_name": "gem_xsum", "prompt_name": "summarize_DOC", "bleu": 8.81729878858539e-39, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "019726f2-7140-4ab6-a18d-a5f9cc709a47", "prompt_jinja": "Summarize: {{document}} Summary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 2.936570585229035e-33}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_0.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..d68139e74a5c18b8ac0a6df1b0846adb64130c2c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_precision": 0.10015017289953904, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016276274188076708}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_recall": 0.24643756064476016, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003818429494069263}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_fmeasure": 0.14062273915493637, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022000708799639716}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_precision": 0.016701004797808148, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007833139485381877}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_recall": 0.04255250278955721, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0020010362587043313}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_fmeasure": 0.02364976404549372, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010941164002693394}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_precision": 0.08541568567160791, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012793972003565766}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_recall": 0.21080805741967854, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0030118865157834174}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_fmeasure": 0.12001163311402531, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017191781997495605}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_precision": 0.07735151668808841, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013159109974304754}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_recall": 0.1920023167499489, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0032002989755440677}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_fmeasure": 0.10884604959000686, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017934244319739035}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "bleu": 0.938533817434639, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06661804850792503}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_1.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..7c5697730a9eaadb71168a96f6d189e1ad9c4330 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_precision": 0.12052574025109639, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0022210016746994497}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_recall": 0.24517047230002495, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004088354034068248}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_fmeasure": 0.15267367243026655, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0024154109938806533}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_precision": 0.01753912194656959, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009726986505547985}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_recall": 0.039922933252291584, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0019893681203709793}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_fmeasure": 0.023239358291017316, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001176157253837938}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_precision": 0.09629033131871302, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0016769870522745736}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_recall": 0.19827068105870446, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0031512156971516287}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_fmeasure": 0.12230123546095711, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017734340030687993}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_precision": 0.09385616207958877, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001782715945523954}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_recall": 0.19236967599868202, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003399193457550934}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_fmeasure": 0.11886986067012077, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019377362527584576}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "bleu": 1.0178456834586884, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09075195516691038}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_2.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..40c8bd26fbccb763e7b7e2feeb6035e08dfdc877 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_precision": 0.1290901025083865, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0025827449855700116}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_recall": 0.2345962358974404, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0037295243525013715}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_fmeasure": 0.15283133102375288, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002341223307257669}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_precision": 0.019485063384407045, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0011129553201441335}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_recall": 0.03790567635691087, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0019138824398120548}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_fmeasure": 0.02358829661609317, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011830288079803606}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_precision": 0.10570526084215368, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0020534248709442697}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_recall": 0.19497376652941362, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003002241641214748}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_fmeasure": 0.12562572604910113, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017798211682304972}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_precision": 0.10165398752195147, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0021383205417301585}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_recall": 0.18432216882062125, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003052415691915719}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_fmeasure": 0.11974611561902486, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018765281940315045}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "bleu": 1.0056063433565963, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12502919332145682}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_3.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..0b17f2380c2322cce8341bd7b88acf8c929e2ebf --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_precision": 0.13244835829967186, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030908517791536955}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_recall": 0.212588522071666, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003862810884401867}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_fmeasure": 0.14887450442636246, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00274448239030748}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_precision": 0.021448284322085266, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001389010296371262}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_recall": 0.034841891806055784, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001852154768566367}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_fmeasure": 0.02410030299641933, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013458556293409396}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_precision": 0.10749771032588204, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0024493472667949895}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_recall": 0.17604861924046014, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0031652710323202445}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_fmeasure": 0.1217506880628755, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002161955887082069}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_precision": 0.104633566179651, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002509395651495409}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_recall": 0.1689633360044492, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003201254184241733}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_fmeasure": 0.11764148962890421, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022216493805243694}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "bleu": 1.033576975690131, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10612867731517484}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_4.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..13b150584074e6cf6df2048e8f04430dabfd72d4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_precision": 0.03956432039685865, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002695013853025962}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_recall": 0.0492235193132569, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003192852760395571}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_fmeasure": 0.03807978716467659, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023526154852758037}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_precision": 0.006014101598531974, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007964027992614052}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_recall": 0.008256986942409495, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009340954377351208}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_fmeasure": 0.006031590914671055, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0006648715199487534}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_precision": 0.03171603699831025, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0021853394627631482}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_recall": 0.04009236083483906, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0026034873427392777}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_fmeasure": 0.030352247654815704, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018302961576454654}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_precision": 0.031509274099563875, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002203992098896614}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_recall": 0.03906347934555628, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025972716725576332}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_fmeasure": 0.029887550353590413, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018411395918680518}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "bleu": 0.17410955528806296, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.035584087070912734}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_5.json b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..c2123db9c02fa2675d4d561bf446089dde6986ea --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_precision": 0.003716409376786735, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001337034178653581}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_recall": 0.0005091066357522025, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0001976825729298653}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge1_fmeasure": 0.0008897900877168533, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00034150756554492286}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_precision": 0.0008576329331046312, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008576329331046335}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_recall": 0.0001072041166380789, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00010720411663807919}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rouge2_fmeasure": 0.0001905850962454736, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00019058509624547674}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_precision": 0.003716409376786735, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001337034178653581}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_recall": 0.0005091066357522025, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0001976825729298653}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeL_fmeasure": 0.0008897900877168533, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00034150756554492286}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_precision": 0.003716409376786735, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001337034178653581}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_recall": 0.0005091066357522025, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0001976825729298653}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "rougeLsum_fmeasure": 0.0008897900877168533, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00034150756554492286}, {"task_name": "gem_xsum", "prompt_name": "summarize_this_DOC_summary", "bleu": 0.0, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "7d3584c5-8864-4d11-bce9-65499cdef4cb", "prompt_jinja": "Summarize this document: {{document}}\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_0.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_0.json new file mode 100644 index 0000000000000000000000000000000000000000..57b720cb04a3513873a87bf31c0cf0551da833f2 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "Correct the solution", "bleu": 3.8125985335008306, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "bleu_stderr": 0.19585723695627313}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_precision": 0.06195211374786627, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_precision_stderr": 0.002094672822781696}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_recall": 0.5162593447897136, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_recall_stderr": 0.007535474730992951}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_fmeasure": 0.10026196967185438, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_fmeasure_stderr": 0.002577507472274814}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_precision": 0.04159189415204392, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_precision_stderr": 0.0018730228506480733}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_recall": 0.34313550824678557, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_recall_stderr": 0.008028791094629359}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_fmeasure": 0.06739212009258444, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_fmeasure_stderr": 0.0024788238669206223}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_precision": 0.06050388889344785, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_precision_stderr": 0.0020837386360429886}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_recall": 0.5050572660910998, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_recall_stderr": 0.0075928927820709475}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_fmeasure": 0.09782216479158326, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_fmeasure_stderr": 0.002562059753885468}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_precision": 0.058339101160589345, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_precision_stderr": 0.0020763508063761675}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_recall": 0.49022104146755285, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_recall_stderr": 0.007674824806892895}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_fmeasure": 0.09420308480263866, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_fmeasure_stderr": 0.0025478112691233964}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_1.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_1.json new file mode 100644 index 0000000000000000000000000000000000000000..b40f558c48975a80a3fcd896a7496b94c58825d9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "Correct the solution", "bleu": 4.984115818676601, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "bleu_stderr": 0.2014952793991324}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_precision": 0.19442793157823965, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_precision_stderr": 0.00631830683765845}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_recall": 0.370171505230012, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_recall_stderr": 0.008099242120212022}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_fmeasure": 0.18342088795857475, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_fmeasure_stderr": 0.005818959285067925}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_precision": 0.11018136222612517, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_precision_stderr": 0.005443725071980991}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_recall": 0.2408430767207643, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_recall_stderr": 0.007813135912300767}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_fmeasure": 0.11307708650003363, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_fmeasure_stderr": 0.005231782535257262}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_precision": 0.1830299827679806, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_precision_stderr": 0.006160676873355797}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_recall": 0.36225372579005893, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_recall_stderr": 0.008159488239081773}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_fmeasure": 0.17550095028177032, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_fmeasure_stderr": 0.00577802693447086}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_precision": 0.18472309686765498, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_precision_stderr": 0.006223758103180095}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_recall": 0.3537491479629035, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_recall_stderr": 0.00808717995340113}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_fmeasure": 0.17499439612022724, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_fmeasure_stderr": 0.005797120916813952}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_2.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_2.json new file mode 100644 index 0000000000000000000000000000000000000000..80589f6556a5d3d376ed88ae884f55d109307d60 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "Correct the solution", "bleu": 8.796771634022612, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "bleu_stderr": 0.3911316659754168}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_precision": 0.3383306969374818, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_precision_stderr": 0.00792879108703781}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_recall": 0.4846166240136867, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_recall_stderr": 0.00804262853108372}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_fmeasure": 0.3228173852707755, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_fmeasure_stderr": 0.007403843294444907}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_precision": 0.2300011447301482, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_precision_stderr": 0.007377109399709124}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_recall": 0.3444986994568273, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_recall_stderr": 0.00824995631557637}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_fmeasure": 0.22646917848202447, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_fmeasure_stderr": 0.00698616733256225}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_precision": 0.3246607866009956, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_precision_stderr": 0.007877219184249775}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_recall": 0.47418408456836464, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_recall_stderr": 0.008158139222284562}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_fmeasure": 0.31247494161086015, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_fmeasure_stderr": 0.0074228242548635155}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_precision": 0.32707982563239135, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_precision_stderr": 0.007911882392548962}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_recall": 0.46874851911407, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_recall_stderr": 0.008123276443221847}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_fmeasure": 0.312822879065026, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_fmeasure_stderr": 0.007434259241066784}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_3.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_3.json new file mode 100644 index 0000000000000000000000000000000000000000..921eb02a39af2d206ec83ed10b3de91c3ce680f4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "Correct the solution", "bleu": 10.92470649416081, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "bleu_stderr": 0.34911211757447225}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_precision": 0.4066260252913565, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_precision_stderr": 0.008231212467132855}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_recall": 0.5342107425741864, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_recall_stderr": 0.007869279508511906}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_fmeasure": 0.38818722809693845, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_fmeasure_stderr": 0.00771433388675621}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_precision": 0.2880242729653989, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_precision_stderr": 0.007821913844444021}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_recall": 0.3925434030174727, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_recall_stderr": 0.008258713922522358}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_fmeasure": 0.2818434589400104, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_fmeasure_stderr": 0.007413903409954248}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_precision": 0.39159852574062887, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_precision_stderr": 0.008194163001101947}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_recall": 0.5241346096429227, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_recall_stderr": 0.007997099796142482}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_fmeasure": 0.37744167318989674, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_fmeasure_stderr": 0.007752920486516649}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_precision": 0.39413857987963213, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_precision_stderr": 0.008220017403685996}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_recall": 0.5206460681141725, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_recall_stderr": 0.007963384607750488}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_fmeasure": 0.3780280451942055, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_fmeasure_stderr": 0.00775661536455512}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_4.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_4.json new file mode 100644 index 0000000000000000000000000000000000000000..7eb42d31b6155340a286c6f3f9e78866e9fb0849 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "Correct the solution", "bleu": 12.858154272179098, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "bleu_stderr": 0.7278916577243625}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_precision": 0.4624335317026793, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_precision_stderr": 0.00826557767550841}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_recall": 0.563714294146611, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_recall_stderr": 0.007612246963891272}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_fmeasure": 0.4351861616924822, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_fmeasure_stderr": 0.007752659407235736}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_precision": 0.33238154342689963, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_precision_stderr": 0.008074274590958037}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_recall": 0.4191511130266625, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_recall_stderr": 0.008174601438156475}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_fmeasure": 0.32028893653738877, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_fmeasure_stderr": 0.007613634129323394}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_precision": 0.44619870598373307, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_precision_stderr": 0.008266117528340293}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_recall": 0.5526110549048481, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_recall_stderr": 0.0077829782817278575}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_fmeasure": 0.423598898334552, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_fmeasure_stderr": 0.00783537952283608}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_precision": 0.4499232867985444, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_precision_stderr": 0.008282275077255348}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_recall": 0.5499968165738798, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_recall_stderr": 0.007744452222402591}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_fmeasure": 0.4248676289113619, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_fmeasure_stderr": 0.007827628127897685}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_5.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_5.json new file mode 100644 index 0000000000000000000000000000000000000000..bc8f45b5a31f2688daf00dab67bbc7cfa90949ea --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_Correct-the-solution_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "Correct the solution", "bleu": 14.060734984477335, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "bleu_stderr": 0.3178125511335596}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_precision": 0.48801414741034715, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_precision_stderr": 0.008239688228502035}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_recall": 0.5852280389043574, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_recall_stderr": 0.007442570769433745}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge1_fmeasure": 0.46127098559948143, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge1_fmeasure_stderr": 0.007762781204476088}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_precision": 0.35639429216000035, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_precision_stderr": 0.0080903487818709}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_recall": 0.4396664484838038, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_recall_stderr": 0.008127010865714702}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rouge2_fmeasure": 0.3436917448426498, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rouge2_fmeasure_stderr": 0.007674129864826368}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_precision": 0.4725439297415101, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_precision_stderr": 0.008240735797918716}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_recall": 0.5746878552043578, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_recall_stderr": 0.007604121138500754}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeL_fmeasure": 0.45010094870717293, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeL_fmeasure_stderr": 0.007834754180971275}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_precision": 0.47616423957106324, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_precision_stderr": 0.008273908357739463}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_recall": 0.5724908344656849, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_recall_stderr": 0.007578130144250922}, {"task_name": "piqa", "prompt_name": "Correct the solution", "rougeLsum_fmeasure": 0.4513631978392974, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "5f4b4645-9438-4375-9062-083130e6d04e", "prompt_jinja": "Given a goal and a wrong solution, rewrite it to give a correct solution.\nGoal: {{goal}} \nSolution: {{[sol1, sol2][1 - label]}}\nCorrected solution:\n|||\n{{[sol1, sol2][label]}}\n", "prompt_original_task": false, "comment": "", "rougeLsum_fmeasure_stderr": 0.007835514039656419}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_0.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_0.json new file mode 100644 index 0000000000000000000000000000000000000000..a7f34837e591f0d45c36086c9ea4187c90db672d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "choose the most appropriate solution", "acc": 0.49891186071817195, "fixed_answer_choice_list": ["Solution 1", "Solution 2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "adfef248-f856-44fa-affd-e3223718854e", "prompt_jinja": "Given a goal and 2 solutions, choose the most appropriate solution.\nGoal: {{goal}}\n- {{\"Solution 1\"}}: {{sol1}}\n- {{\"Solution 2\"}}: {{sol2}}\n\nAnswer by returning either {{\"Solution 1\"}} or {{\"Solution 2\"}}\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011665796539540878}, {"task_name": "piqa", "prompt_name": "choose the most appropriate solution", "acc_norm": 0.49891186071817195, "fixed_answer_choice_list": ["Solution 1", "Solution 2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "adfef248-f856-44fa-affd-e3223718854e", "prompt_jinja": "Given a goal and 2 solutions, choose the most appropriate solution.\nGoal: {{goal}}\n- {{\"Solution 1\"}}: {{sol1}}\n- {{\"Solution 2\"}}: {{sol2}}\n\nAnswer by returning either {{\"Solution 1\"}} or {{\"Solution 2\"}}\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011665796539540878}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_1.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_1.json new file mode 100644 index 0000000000000000000000000000000000000000..17a49ab0fe02f13034adbd15fa5a57042fe5fc4b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "choose the most appropriate solution", "acc": 0.5021762785636561, "fixed_answer_choice_list": ["Solution 1", "Solution 2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "adfef248-f856-44fa-affd-e3223718854e", "prompt_jinja": "Given a goal and 2 solutions, choose the most appropriate solution.\nGoal: {{goal}}\n- {{\"Solution 1\"}}: {{sol1}}\n- {{\"Solution 2\"}}: {{sol2}}\n\nAnswer by returning either {{\"Solution 1\"}} or {{\"Solution 2\"}}\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011665713661738877}, {"task_name": "piqa", "prompt_name": "choose the most appropriate solution", "acc_norm": 0.5021762785636561, "fixed_answer_choice_list": ["Solution 1", "Solution 2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "adfef248-f856-44fa-affd-e3223718854e", "prompt_jinja": "Given a goal and 2 solutions, choose the most appropriate solution.\nGoal: {{goal}}\n- {{\"Solution 1\"}}: {{sol1}}\n- {{\"Solution 2\"}}: {{sol2}}\n\nAnswer by returning either {{\"Solution 1\"}} or {{\"Solution 2\"}}\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011665713661738877}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_2.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_2.json new file mode 100644 index 0000000000000000000000000000000000000000..cb2bb637403a273fd4f025735bb471e0da36dc7b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "choose the most appropriate solution", "acc": 0.5119695321001088, "fixed_answer_choice_list": ["Solution 1", "Solution 2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "adfef248-f856-44fa-affd-e3223718854e", "prompt_jinja": "Given a goal and 2 solutions, choose the most appropriate solution.\nGoal: {{goal}}\n- {{\"Solution 1\"}}: {{sol1}}\n- {{\"Solution 2\"}}: {{sol2}}\n\nAnswer by returning either {{\"Solution 1\"}} or {{\"Solution 2\"}}\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011662480968070068}, {"task_name": "piqa", "prompt_name": "choose the most appropriate solution", "acc_norm": 0.5119695321001088, "fixed_answer_choice_list": ["Solution 1", "Solution 2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "adfef248-f856-44fa-affd-e3223718854e", "prompt_jinja": "Given a goal and 2 solutions, choose the most appropriate solution.\nGoal: {{goal}}\n- {{\"Solution 1\"}}: {{sol1}}\n- {{\"Solution 2\"}}: {{sol2}}\n\nAnswer by returning either {{\"Solution 1\"}} or {{\"Solution 2\"}}\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011662480968070068}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_3.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_3.json new file mode 100644 index 0000000000000000000000000000000000000000..6f94a597fe3248dabb078e8785d726a47882e80d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "choose the most appropriate solution", "acc": 0.5032644178454843, "fixed_answer_choice_list": ["Solution 1", "Solution 2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "adfef248-f856-44fa-affd-e3223718854e", "prompt_jinja": "Given a goal and 2 solutions, choose the most appropriate solution.\nGoal: {{goal}}\n- {{\"Solution 1\"}}: {{sol1}}\n- {{\"Solution 2\"}}: {{sol2}}\n\nAnswer by returning either {{\"Solution 1\"}} or {{\"Solution 2\"}}\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01166557553076037}, {"task_name": "piqa", "prompt_name": "choose the most appropriate solution", "acc_norm": 0.5032644178454843, "fixed_answer_choice_list": ["Solution 1", "Solution 2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "adfef248-f856-44fa-affd-e3223718854e", "prompt_jinja": "Given a goal and 2 solutions, choose the most appropriate solution.\nGoal: {{goal}}\n- {{\"Solution 1\"}}: {{sol1}}\n- {{\"Solution 2\"}}: {{sol2}}\n\nAnswer by returning either {{\"Solution 1\"}} or {{\"Solution 2\"}}\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01166557553076037}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_4.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_4.json new file mode 100644 index 0000000000000000000000000000000000000000..8d3c2c22ce632026c6a1aee79425e1517801d049 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "choose the most appropriate solution", "acc": 0.5038084874863983, "fixed_answer_choice_list": ["Solution 1", "Solution 2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "adfef248-f856-44fa-affd-e3223718854e", "prompt_jinja": "Given a goal and 2 solutions, choose the most appropriate solution.\nGoal: {{goal}}\n- {{\"Solution 1\"}}: {{sol1}}\n- {{\"Solution 2\"}}: {{sol2}}\n\nAnswer by returning either {{\"Solution 1\"}} or {{\"Solution 2\"}}\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0116654857447468}, {"task_name": "piqa", "prompt_name": "choose the most appropriate solution", "acc_norm": 0.5038084874863983, "fixed_answer_choice_list": ["Solution 1", "Solution 2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "adfef248-f856-44fa-affd-e3223718854e", "prompt_jinja": "Given a goal and 2 solutions, choose the most appropriate solution.\nGoal: {{goal}}\n- {{\"Solution 1\"}}: {{sol1}}\n- {{\"Solution 2\"}}: {{sol2}}\n\nAnswer by returning either {{\"Solution 1\"}} or {{\"Solution 2\"}}\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.0116654857447468}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_5.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_5.json new file mode 100644 index 0000000000000000000000000000000000000000..a113111efe813374274f33a10e17c2cf8db76982 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "choose the most appropriate solution", "acc": 0.5016322089227421, "fixed_answer_choice_list": ["Solution 1", "Solution 2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "adfef248-f856-44fa-affd-e3223718854e", "prompt_jinja": "Given a goal and 2 solutions, choose the most appropriate solution.\nGoal: {{goal}}\n- {{\"Solution 1\"}}: {{sol1}}\n- {{\"Solution 2\"}}: {{sol2}}\n\nAnswer by returning either {{\"Solution 1\"}} or {{\"Solution 2\"}}\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011665762007194868}, {"task_name": "piqa", "prompt_name": "choose the most appropriate solution", "acc_norm": 0.5016322089227421, "fixed_answer_choice_list": ["Solution 1", "Solution 2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "adfef248-f856-44fa-affd-e3223718854e", "prompt_jinja": "Given a goal and 2 solutions, choose the most appropriate solution.\nGoal: {{goal}}\n- {{\"Solution 1\"}}: {{sol1}}\n- {{\"Solution 2\"}}: {{sol2}}\n\nAnswer by returning either {{\"Solution 1\"}} or {{\"Solution 2\"}}\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011665762007194868}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_0.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_0.json new file mode 100644 index 0000000000000000000000000000000000000000..cabacedf2700fe17e5c4dc754fff50d417cd11d6 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "no prompt needed", "bleu": 0.08868063793537338, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "bleu_stderr": 0.010679205176766326}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_precision": 0.024079092011199548, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_precision_stderr": 0.0012751225464276315}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_recall": 0.16382504234512227, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_recall_stderr": 0.0033146808351779324}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_fmeasure": 0.03326642877020731, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_fmeasure_stderr": 0.0009655071900921132}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_precision": 0.0026147181115056114, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_precision_stderr": 0.0002787850657177482}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_recall": 0.01942570519259786, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_recall_stderr": 0.0011345558550410505}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_fmeasure": 0.0038833183768095616, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_fmeasure_stderr": 0.0002654112249994602}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_precision": 0.02173563771564973, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_precision_stderr": 0.0011135246337014074}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_recall": 0.1525533994924883, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_recall_stderr": 0.003123796083077344}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_fmeasure": 0.030279861281445327, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_fmeasure_stderr": 0.0008455385563357755}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_precision": 0.019933519868235043, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_precision_stderr": 0.0011006754681286612}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_recall": 0.1373792671164724, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_recall_stderr": 0.002829849172122982}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_fmeasure": 0.02719812824801991, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_fmeasure_stderr": 0.0007883831967628123}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_1.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_1.json new file mode 100644 index 0000000000000000000000000000000000000000..6e575a31e70c6a934d83b5548df7716001b162d6 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "no prompt needed", "bleu": 0.13588134778013236, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "bleu_stderr": 0.015459519670994496}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_precision": 0.05300894951369402, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_precision_stderr": 0.0027301529015735723}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_recall": 0.1061507145383213, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_recall_stderr": 0.0032149427593071117}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_fmeasure": 0.04901346680697841, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_fmeasure_stderr": 0.002055865923271878}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_precision": 0.008494513980757509, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_precision_stderr": 0.0008377205870598726}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_recall": 0.015925496135927367, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_recall_stderr": 0.0011332549301939603}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_fmeasure": 0.007982487413659915, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_fmeasure_stderr": 0.000712887056916324}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_precision": 0.0457742343119478, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_precision_stderr": 0.002346181762140086}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_recall": 0.09699249115638213, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_recall_stderr": 0.003006081235657284}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_fmeasure": 0.04302281186011969, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_fmeasure_stderr": 0.0018156151734925613}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_precision": 0.04604314004112248, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_precision_stderr": 0.002446501769385043}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_recall": 0.090627933605973, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_recall_stderr": 0.002845023971018631}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_fmeasure": 0.042082706048624005, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018277105669673647}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_2.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f2c48cf96b9ba5f59384ade3cdb1590105e82f17 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "no prompt needed", "bleu": 0.2131390370421847, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "bleu_stderr": 0.034066817734553397}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_precision": 0.04766490997973889, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_precision_stderr": 0.0027505872541556155}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_recall": 0.056720871691876285, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_recall_stderr": 0.002671280465988878}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_fmeasure": 0.03799621801376054, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_fmeasure_stderr": 0.0019833229325767424}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_precision": 0.007753824889172869, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_precision_stderr": 0.0009180726582653088}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_recall": 0.009676078489313466, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_recall_stderr": 0.0009719598996118394}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_fmeasure": 0.0063412101839208194, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_fmeasure_stderr": 0.0006568616041856655}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_precision": 0.04049213061658682, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_precision_stderr": 0.0023256155896140274}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_recall": 0.05059606948650782, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_recall_stderr": 0.0024394702454846825}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_fmeasure": 0.03271614089031179, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_fmeasure_stderr": 0.0017135896610323722}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_precision": 0.041656568079500036, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_precision_stderr": 0.0024499403689278464}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_recall": 0.04892348784841611, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_recall_stderr": 0.002356085346485099}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_fmeasure": 0.03282280188970894, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017310021914839983}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_3.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_3.json new file mode 100644 index 0000000000000000000000000000000000000000..3abdad22c3555da729f3605d515aad2ed28b068a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "no prompt needed", "bleu": 0.22997720223598342, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "bleu_stderr": 0.02338425872757522}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_precision": 0.05031505431843015, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_precision_stderr": 0.0029584770445463347}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_recall": 0.04384933797682676, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_recall_stderr": 0.002438097616145567}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_fmeasure": 0.03710567348980287, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_fmeasure_stderr": 0.0020180005367721313}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_precision": 0.008144343048337636, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_precision_stderr": 0.0009881296494107798}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_recall": 0.0074638698626018, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_recall_stderr": 0.0009315826019915539}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_fmeasure": 0.006082833337651904, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_fmeasure_stderr": 0.0006978818850891022}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_precision": 0.043579152184238995, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_precision_stderr": 0.0025694327719803156}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_recall": 0.03882153127320646, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_recall_stderr": 0.002203760453572274}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_fmeasure": 0.032270439068929166, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_fmeasure_stderr": 0.0017622474773529432}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_precision": 0.04474778358521563, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_precision_stderr": 0.002654591770374059}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_recall": 0.03883827376493022, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_recall_stderr": 0.0022045086521405998}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_fmeasure": 0.0327651263489432, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_fmeasure_stderr": 0.001788852779448503}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_4.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_4.json new file mode 100644 index 0000000000000000000000000000000000000000..a6d62e3bbf1d8b443b3e9e2d61ceec4cc364efd6 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "no prompt needed", "bleu": 0.2712297654901753, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "bleu_stderr": 0.053042750349294135}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_precision": 0.0542226515580773, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_precision_stderr": 0.0030939110930176925}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_recall": 0.046451743768737, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_recall_stderr": 0.0025439882698671474}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_fmeasure": 0.04059849587408926, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_fmeasure_stderr": 0.002182568730131035}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_precision": 0.008974181628774633, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_precision_stderr": 0.0009729364560951758}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_recall": 0.00866884991169637, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_recall_stderr": 0.0010298253089854248}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_fmeasure": 0.0070696546789010785, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_fmeasure_stderr": 0.0007767095517004894}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_precision": 0.04715725401216418, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_precision_stderr": 0.0027156375130040103}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_recall": 0.040955294760240304, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_recall_stderr": 0.002277246735797152}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_fmeasure": 0.03526961790871762, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_fmeasure_stderr": 0.0019038989493982874}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_precision": 0.048027622411834725, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_precision_stderr": 0.002785556905287026}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_recall": 0.040990146321799474, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_recall_stderr": 0.002275559958385726}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_fmeasure": 0.03562639655601933, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019216190005916407}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_5.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_5.json new file mode 100644 index 0000000000000000000000000000000000000000..3d766a42c76f3423c73cd296d740d12238412fde --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_no-prompt-needed_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "no prompt needed", "bleu": 0.1498156695877611, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "bleu_stderr": 0.028223046286397702}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_precision": 0.05946021488215291, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_precision_stderr": 0.00327624600239411}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_recall": 0.04615385829211834, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_recall_stderr": 0.0024475462608166006}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge1_fmeasure": 0.042735679103126284, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge1_fmeasure_stderr": 0.002181587233057925}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_precision": 0.0075902239127463664, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_precision_stderr": 0.0008916116262057205}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_recall": 0.007204549919294013, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_recall_stderr": 0.000911423031900326}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rouge2_fmeasure": 0.006029826803091018, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rouge2_fmeasure_stderr": 0.0007150551789811505}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_precision": 0.05132083543992989, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_precision_stderr": 0.0028482950530335353}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_recall": 0.04072766325284397, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_recall_stderr": 0.002204434423881122}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeL_fmeasure": 0.03698020550413456, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeL_fmeasure_stderr": 0.0018951966460887433}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_precision": 0.05260647328921363, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_precision_stderr": 0.00295071182859548}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_recall": 0.04093574793020607, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_recall_stderr": 0.0022066554152275607}, {"task_name": "piqa", "prompt_name": "no prompt needed", "rougeLsum_fmeasure": 0.03752446063060584, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "c8c45ef1-2ffc-43d7-8710-b98c2fc4f699", "prompt_jinja": "{{goal}}\n|||\n{{[sol1[0].lower() + sol1[1:], sol2[0].lower() + sol2[1:]][label]}}", "prompt_original_task": false, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019205547729290852}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_0.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_0.json new file mode 100644 index 0000000000000000000000000000000000000000..6bf5c7966b25b4b66841e38b66f1196c26bcde85 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "pick_correct_choice_index", "acc": 0.49510337323177367, "fixed_answer_choice_list": ["1", "2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "44778818-7b73-4262-a294-c00fc32b6c2c", "prompt_jinja": "Sentence: {{goal}}\n\nChoice {{answer_choices[0]}}: {{sol1}}\n\nChoice {{answer_choices[1]}}: {{sol2}}\n\nWhat is the index of the correct choice for ending for the sentence?\n\nAnswer:\n\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01166526473007815}, {"task_name": "piqa", "prompt_name": "pick_correct_choice_index", "acc_norm": 0.49510337323177367, "fixed_answer_choice_list": ["1", "2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "44778818-7b73-4262-a294-c00fc32b6c2c", "prompt_jinja": "Sentence: {{goal}}\n\nChoice {{answer_choices[0]}}: {{sol1}}\n\nChoice {{answer_choices[1]}}: {{sol2}}\n\nWhat is the index of the correct choice for ending for the sentence?\n\nAnswer:\n\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01166526473007815}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_1.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_1.json new file mode 100644 index 0000000000000000000000000000000000000000..e3e0f8cc090253a84f1f3e1a6dfeb6a0d132748d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "pick_correct_choice_index", "acc": 0.5048966267682263, "fixed_answer_choice_list": ["1", "2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "44778818-7b73-4262-a294-c00fc32b6c2c", "prompt_jinja": "Sentence: {{goal}}\n\nChoice {{answer_choices[0]}}: {{sol1}}\n\nChoice {{answer_choices[1]}}: {{sol2}}\n\nWhat is the index of the correct choice for ending for the sentence?\n\nAnswer:\n\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01166526473007814}, {"task_name": "piqa", "prompt_name": "pick_correct_choice_index", "acc_norm": 0.5048966267682263, "fixed_answer_choice_list": ["1", "2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "44778818-7b73-4262-a294-c00fc32b6c2c", "prompt_jinja": "Sentence: {{goal}}\n\nChoice {{answer_choices[0]}}: {{sol1}}\n\nChoice {{answer_choices[1]}}: {{sol2}}\n\nWhat is the index of the correct choice for ending for the sentence?\n\nAnswer:\n\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01166526473007814}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_2.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_2.json new file mode 100644 index 0000000000000000000000000000000000000000..dec70cf12885d3918ad7faeb38bbb180aba2636c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "pick_correct_choice_index", "acc": 0.5027203482045702, "fixed_answer_choice_list": ["1", "2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "44778818-7b73-4262-a294-c00fc32b6c2c", "prompt_jinja": "Sentence: {{goal}}\n\nChoice {{answer_choices[0]}}: {{sol1}}\n\nChoice {{answer_choices[1]}}: {{sol2}}\n\nWhat is the index of the correct choice for ending for the sentence?\n\nAnswer:\n\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011665651503000718}, {"task_name": "piqa", "prompt_name": "pick_correct_choice_index", "acc_norm": 0.5027203482045702, "fixed_answer_choice_list": ["1", "2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "44778818-7b73-4262-a294-c00fc32b6c2c", "prompt_jinja": "Sentence: {{goal}}\n\nChoice {{answer_choices[0]}}: {{sol1}}\n\nChoice {{answer_choices[1]}}: {{sol2}}\n\nWhat is the index of the correct choice for ending for the sentence?\n\nAnswer:\n\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011665651503000718}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_3.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_3.json new file mode 100644 index 0000000000000000000000000000000000000000..a81c930713874bda402b5db9dfcb8c9352c48df8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "pick_correct_choice_index", "acc": 0.5108813928182807, "fixed_answer_choice_list": ["1", "2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "44778818-7b73-4262-a294-c00fc32b6c2c", "prompt_jinja": "Sentence: {{goal}}\n\nChoice {{answer_choices[0]}}: {{sol1}}\n\nChoice {{answer_choices[1]}}: {{sol2}}\n\nWhat is the index of the correct choice for ending for the sentence?\n\nAnswer:\n\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011663061261117758}, {"task_name": "piqa", "prompt_name": "pick_correct_choice_index", "acc_norm": 0.5108813928182807, "fixed_answer_choice_list": ["1", "2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "44778818-7b73-4262-a294-c00fc32b6c2c", "prompt_jinja": "Sentence: {{goal}}\n\nChoice {{answer_choices[0]}}: {{sol1}}\n\nChoice {{answer_choices[1]}}: {{sol2}}\n\nWhat is the index of the correct choice for ending for the sentence?\n\nAnswer:\n\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011663061261117758}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_4.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_4.json new file mode 100644 index 0000000000000000000000000000000000000000..9abd20e8513f7d36bbe30c6b3abc17a61772518e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "pick_correct_choice_index", "acc": 0.5087051142546246, "fixed_answer_choice_list": ["1", "2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "44778818-7b73-4262-a294-c00fc32b6c2c", "prompt_jinja": "Sentence: {{goal}}\n\nChoice {{answer_choices[0]}}: {{sol1}}\n\nChoice {{answer_choices[1]}}: {{sol2}}\n\nWhat is the index of the correct choice for ending for the sentence?\n\nAnswer:\n\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011664055982032842}, {"task_name": "piqa", "prompt_name": "pick_correct_choice_index", "acc_norm": 0.5087051142546246, "fixed_answer_choice_list": ["1", "2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "44778818-7b73-4262-a294-c00fc32b6c2c", "prompt_jinja": "Sentence: {{goal}}\n\nChoice {{answer_choices[0]}}: {{sol1}}\n\nChoice {{answer_choices[1]}}: {{sol2}}\n\nWhat is the index of the correct choice for ending for the sentence?\n\nAnswer:\n\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011664055982032842}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_5.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_5.json new file mode 100644 index 0000000000000000000000000000000000000000..7bd20777611f5ea4812a15ea9461da03dcce36f7 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_pick_correct_choice_index_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "pick_correct_choice_index", "acc": 0.5032644178454843, "fixed_answer_choice_list": ["1", "2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "44778818-7b73-4262-a294-c00fc32b6c2c", "prompt_jinja": "Sentence: {{goal}}\n\nChoice {{answer_choices[0]}}: {{sol1}}\n\nChoice {{answer_choices[1]}}: {{sol2}}\n\nWhat is the index of the correct choice for ending for the sentence?\n\nAnswer:\n\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011665575530760367}, {"task_name": "piqa", "prompt_name": "pick_correct_choice_index", "acc_norm": 0.5032644178454843, "fixed_answer_choice_list": ["1", "2"], "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "44778818-7b73-4262-a294-c00fc32b6c2c", "prompt_jinja": "Sentence: {{goal}}\n\nChoice {{answer_choices[0]}}: {{sol1}}\n\nChoice {{answer_choices[1]}}: {{sol2}}\n\nWhat is the index of the correct choice for ending for the sentence?\n\nAnswer:\n\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011665575530760367}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_0.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_0.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0ad08124c177b3bfd99d8591671505024144e0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "what_is_the_correct_ending", "acc": 0.5136017410228509, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "16e97a16-c958-4956-bfba-279f88dafd5b", "prompt_jinja": "Goal: {{goal}}\n\nWhich is the correct ending?\n- {{sol1}}\n- {{sol2}}\n\nAnswer:\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01166150683982377}, {"task_name": "piqa", "prompt_name": "what_is_the_correct_ending", "acc_norm": 0.5179542981501633, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "16e97a16-c958-4956-bfba-279f88dafd5b", "prompt_jinja": "Goal: {{goal}}\n\nWhich is the correct ending?\n- {{sol1}}\n- {{sol2}}\n\nAnswer:\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011658300623287153}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_1.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_1.json new file mode 100644 index 0000000000000000000000000000000000000000..10e90f4e26b4d6a8664256080cbc4e512eff102b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "what_is_the_correct_ending", "acc": 0.5206746463547334, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "16e97a16-c958-4956-bfba-279f88dafd5b", "prompt_jinja": "Goal: {{goal}}\n\nWhich is the correct ending?\n- {{sol1}}\n- {{sol2}}\n\nAnswer:\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011655846995729705}, {"task_name": "piqa", "prompt_name": "what_is_the_correct_ending", "acc_norm": 0.5255712731229597, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "16e97a16-c958-4956-bfba-279f88dafd5b", "prompt_jinja": "Goal: {{goal}}\n\nWhich is the correct ending?\n- {{sol1}}\n- {{sol2}}\n\nAnswer:\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011650557844573577}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_2.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8ddf413b70a9c02a3417e21ded7da088c24904bf --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "what_is_the_correct_ending", "acc": 0.5201305767138193, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "16e97a16-c958-4956-bfba-279f88dafd5b", "prompt_jinja": "Goal: {{goal}}\n\nWhich is the correct ending?\n- {{sol1}}\n- {{sol2}}\n\nAnswer:\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011656365410780373}, {"task_name": "piqa", "prompt_name": "what_is_the_correct_ending", "acc_norm": 0.5206746463547334, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "16e97a16-c958-4956-bfba-279f88dafd5b", "prompt_jinja": "Goal: {{goal}}\n\nWhich is the correct ending?\n- {{sol1}}\n- {{sol2}}\n\nAnswer:\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011655846995729705}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_3.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_3.json new file mode 100644 index 0000000000000000000000000000000000000000..7125a824df2aff3e86db8b91f658f2ff0021d6d0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "what_is_the_correct_ending", "acc": 0.5223068552774756, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "16e97a16-c958-4956-bfba-279f88dafd5b", "prompt_jinja": "Goal: {{goal}}\n\nWhich is the correct ending?\n- {{sol1}}\n- {{sol2}}\n\nAnswer:\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011654208652596471}, {"task_name": "piqa", "prompt_name": "what_is_the_correct_ending", "acc_norm": 0.5266594124047879, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "16e97a16-c958-4956-bfba-279f88dafd5b", "prompt_jinja": "Goal: {{goal}}\n\nWhich is the correct ending?\n- {{sol1}}\n- {{sol2}}\n\nAnswer:\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01164922999434739}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_4.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_4.json new file mode 100644 index 0000000000000000000000000000000000000000..4de875c5c8bdd02630f7938017f72199906d8452 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "what_is_the_correct_ending", "acc": 0.529923830250272, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "16e97a16-c958-4956-bfba-279f88dafd5b", "prompt_jinja": "Goal: {{goal}}\n\nWhich is the correct ending?\n- {{sol1}}\n- {{sol2}}\n\nAnswer:\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011644913435420155}, {"task_name": "piqa", "prompt_name": "what_is_the_correct_ending", "acc_norm": 0.529379760609358, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "16e97a16-c958-4956-bfba-279f88dafd5b", "prompt_jinja": "Goal: {{goal}}\n\nWhich is the correct ending?\n- {{sol1}}\n- {{sol2}}\n\nAnswer:\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011645667565050864}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_5.json b/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_5.json new file mode 100644 index 0000000000000000000000000000000000000000..164f1dfca217e10235658117ade0d7f74efcdcc8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_piqa_what_is_the_correct_ending_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "piqa", "prompt_name": "what_is_the_correct_ending", "acc": 0.5272034820457019, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "16e97a16-c958-4956-bfba-279f88dafd5b", "prompt_jinja": "Goal: {{goal}}\n\nWhich is the correct ending?\n- {{sol1}}\n- {{sol2}}\n\nAnswer:\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011648545262429024}, {"task_name": "piqa", "prompt_name": "what_is_the_correct_ending", "acc_norm": 0.5250272034820457, "fixed_answer_choice_list": null, "dataset_path": "piqa", "dataset_name": null, "subset": null, "prompt_id": "16e97a16-c958-4956-bfba-279f88dafd5b", "prompt_jinja": "Goal: {{goal}}\n\nWhich is the correct ending?\n- {{sol1}}\n- {{sol2}}\n\nAnswer:\n|||\n{{answer_choices[label]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011651200967623707}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_0.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_0.json new file mode 100644 index 0000000000000000000000000000000000000000..60317fb444ab6aeae78c58da897f255181749588 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Direct Question (Closed Book)", "acc": 0.397, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "0af52ad2-2b12-4700-b664-cd26d2da6dc1", "prompt_jinja": "Q: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015480007449307996}, {"task_name": "sciq", "prompt_name": "Direct Question (Closed Book)", "acc_norm": 0.377, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "0af52ad2-2b12-4700-b664-cd26d2da6dc1", "prompt_jinja": "Q: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01533317012577986}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_1.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_1.json new file mode 100644 index 0000000000000000000000000000000000000000..a52cd666dea67421ce20783ecce11710ed7f7987 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Direct Question (Closed Book)", "acc": 0.415, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "0af52ad2-2b12-4700-b664-cd26d2da6dc1", "prompt_jinja": "Q: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015589035185604628}, {"task_name": "sciq", "prompt_name": "Direct Question (Closed Book)", "acc_norm": 0.398, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "0af52ad2-2b12-4700-b664-cd26d2da6dc1", "prompt_jinja": "Q: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015486634102858922}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_2.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_2.json new file mode 100644 index 0000000000000000000000000000000000000000..20d246f1ceb7ebf542f5f2dfd456a4d7bc409f3b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Direct Question (Closed Book)", "acc": 0.405, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "0af52ad2-2b12-4700-b664-cd26d2da6dc1", "prompt_jinja": "Q: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015531136990453049}, {"task_name": "sciq", "prompt_name": "Direct Question (Closed Book)", "acc_norm": 0.397, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "0af52ad2-2b12-4700-b664-cd26d2da6dc1", "prompt_jinja": "Q: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015480007449307994}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_3.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_3.json new file mode 100644 index 0000000000000000000000000000000000000000..4e494f203d6fdae1714a16a372afbeb8923f61d8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Direct Question (Closed Book)", "acc": 0.396, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "0af52ad2-2b12-4700-b664-cd26d2da6dc1", "prompt_jinja": "Q: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015473313265859406}, {"task_name": "sciq", "prompt_name": "Direct Question (Closed Book)", "acc_norm": 0.392, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "0af52ad2-2b12-4700-b664-cd26d2da6dc1", "prompt_jinja": "Q: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015445859463771297}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_4.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_4.json new file mode 100644 index 0000000000000000000000000000000000000000..aef76b2b2ef6aea95ff10ee71864b41ea1e1ae70 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Direct Question (Closed Book)", "acc": 0.383, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "0af52ad2-2b12-4700-b664-cd26d2da6dc1", "prompt_jinja": "Q: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015380102325652721}, {"task_name": "sciq", "prompt_name": "Direct Question (Closed Book)", "acc_norm": 0.392, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "0af52ad2-2b12-4700-b664-cd26d2da6dc1", "prompt_jinja": "Q: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015445859463771295}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_5.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_5.json new file mode 100644 index 0000000000000000000000000000000000000000..d7ac1522664376bc045664b78ff44e6b3d8329a2 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Direct Question (Closed Book)", "acc": 0.388, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "0af52ad2-2b12-4700-b664-cd26d2da6dc1", "prompt_jinja": "Q: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01541731797991108}, {"task_name": "sciq", "prompt_name": "Direct Question (Closed Book)", "acc_norm": 0.379, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "0af52ad2-2b12-4700-b664-cd26d2da6dc1", "prompt_jinja": "Q: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01534909100222535}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_0.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_0.json new file mode 100644 index 0000000000000000000000000000000000000000..898538581c3880d1efe5501f8727155350aa116a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Direct Question", "acc": 0.703, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "d417fcfb-9f00-4186-95d8-e63609495164", "prompt_jinja": "Answer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014456832294801103}, {"task_name": "sciq", "prompt_name": "Direct Question", "acc_norm": 0.6, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "d417fcfb-9f00-4186-95d8-e63609495164", "prompt_jinja": "Answer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015499685165842596}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_1.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_1.json new file mode 100644 index 0000000000000000000000000000000000000000..43c13e0d949e9f97ecf7e3d8b0914efab167bd1d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Direct Question", "acc": 0.723, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "d417fcfb-9f00-4186-95d8-e63609495164", "prompt_jinja": "Answer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014158794845306265}, {"task_name": "sciq", "prompt_name": "Direct Question", "acc_norm": 0.666, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "d417fcfb-9f00-4186-95d8-e63609495164", "prompt_jinja": "Answer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014922019523732965}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_2.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_2.json new file mode 100644 index 0000000000000000000000000000000000000000..dcc5bf8318b865b352568b944be1643066ff226c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Direct Question", "acc": 0.721, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "d417fcfb-9f00-4186-95d8-e63609495164", "prompt_jinja": "Answer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014190150117612037}, {"task_name": "sciq", "prompt_name": "Direct Question", "acc_norm": 0.686, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "d417fcfb-9f00-4186-95d8-e63609495164", "prompt_jinja": "Answer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014683991951087974}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_3.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_3.json new file mode 100644 index 0000000000000000000000000000000000000000..887df171049746cc2d0d9e998c012f092c259fa9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Direct Question", "acc": 0.719, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "d417fcfb-9f00-4186-95d8-e63609495164", "prompt_jinja": "Answer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014221154708434944}, {"task_name": "sciq", "prompt_name": "Direct Question", "acc_norm": 0.692, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "d417fcfb-9f00-4186-95d8-e63609495164", "prompt_jinja": "Answer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014606483127342758}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_4.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_4.json new file mode 100644 index 0000000000000000000000000000000000000000..948c035c617fe0d99afd1855d6ca2c5514b6067b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Direct Question", "acc": 0.723, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "d417fcfb-9f00-4186-95d8-e63609495164", "prompt_jinja": "Answer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014158794845306265}, {"task_name": "sciq", "prompt_name": "Direct Question", "acc_norm": 0.701, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "d417fcfb-9f00-4186-95d8-e63609495164", "prompt_jinja": "Answer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014484778521220465}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_5.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_5.json new file mode 100644 index 0000000000000000000000000000000000000000..921a3425ff3a919532c2e2802fd98b8d1d6b913c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Direct-Question_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Direct Question", "acc": 0.718, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "d417fcfb-9f00-4186-95d8-e63609495164", "prompt_jinja": "Answer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014236526215291354}, {"task_name": "sciq", "prompt_name": "Direct Question", "acc_norm": 0.703, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "d417fcfb-9f00-4186-95d8-e63609495164", "prompt_jinja": "Answer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014456832294801103}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_0.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_0.json new file mode 100644 index 0000000000000000000000000000000000000000..819769f99a3da4067ff1438e6fa1e6b7cbc22375 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice (Closed Book)", "acc": 0.373, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "15b0a989-84e4-4f1c-8ac1-12dbfa2ff42a", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}", "prompt_original_task": false, "comment": "", "acc_stderr": 0.015300493622922814}, {"task_name": "sciq", "prompt_name": "Multiple Choice (Closed Book)", "acc_norm": 0.34, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "15b0a989-84e4-4f1c-8ac1-12dbfa2ff42a", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}", "prompt_original_task": false, "comment": "", "acc_norm_stderr": 0.014987482264363937}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_1.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_1.json new file mode 100644 index 0000000000000000000000000000000000000000..1783e2da4dda6c7f03794e15703ff7caa4fef13f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice (Closed Book)", "acc": 0.376, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "15b0a989-84e4-4f1c-8ac1-12dbfa2ff42a", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}", "prompt_original_task": false, "comment": "", "acc_stderr": 0.015325105508898129}, {"task_name": "sciq", "prompt_name": "Multiple Choice (Closed Book)", "acc_norm": 0.391, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "15b0a989-84e4-4f1c-8ac1-12dbfa2ff42a", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}", "prompt_original_task": false, "comment": "", "acc_norm_stderr": 0.015438826294681783}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_2.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8045f73503ea86bbda2d6e92aa3fee3582fa6440 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice (Closed Book)", "acc": 0.36, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "15b0a989-84e4-4f1c-8ac1-12dbfa2ff42a", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}", "prompt_original_task": false, "comment": "", "acc_stderr": 0.015186527932040117}, {"task_name": "sciq", "prompt_name": "Multiple Choice (Closed Book)", "acc_norm": 0.351, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "15b0a989-84e4-4f1c-8ac1-12dbfa2ff42a", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}", "prompt_original_task": false, "comment": "", "acc_norm_stderr": 0.015100563798316403}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_3.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_3.json new file mode 100644 index 0000000000000000000000000000000000000000..cc8b8cd988efecf710c62eac015ee9158734ce0f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice (Closed Book)", "acc": 0.325, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "15b0a989-84e4-4f1c-8ac1-12dbfa2ff42a", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}", "prompt_original_task": false, "comment": "", "acc_stderr": 0.014818724459095524}, {"task_name": "sciq", "prompt_name": "Multiple Choice (Closed Book)", "acc_norm": 0.345, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "15b0a989-84e4-4f1c-8ac1-12dbfa2ff42a", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}", "prompt_original_task": false, "comment": "", "acc_norm_stderr": 0.015039986742055237}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_4.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d646f18de676056c223111d9fc8df06a7573b598 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice (Closed Book)", "acc": 0.341, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "15b0a989-84e4-4f1c-8ac1-12dbfa2ff42a", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}", "prompt_original_task": false, "comment": "", "acc_stderr": 0.014998131348402707}, {"task_name": "sciq", "prompt_name": "Multiple Choice (Closed Book)", "acc_norm": 0.343, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "15b0a989-84e4-4f1c-8ac1-12dbfa2ff42a", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}", "prompt_original_task": false, "comment": "", "acc_norm_stderr": 0.015019206922356951}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_5.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_5.json new file mode 100644 index 0000000000000000000000000000000000000000..32e9a36f6e723cb216825a16ddd15567adb51bb1 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice (Closed Book)", "acc": 0.332, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "15b0a989-84e4-4f1c-8ac1-12dbfa2ff42a", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}", "prompt_original_task": false, "comment": "", "acc_stderr": 0.014899597242811473}, {"task_name": "sciq", "prompt_name": "Multiple Choice (Closed Book)", "acc_norm": 0.328, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "15b0a989-84e4-4f1c-8ac1-12dbfa2ff42a", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}", "prompt_original_task": false, "comment": "", "acc_norm_stderr": 0.014853842487270334}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_0.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_0.json new file mode 100644 index 0000000000000000000000000000000000000000..725a572a3c95dc90f6af547abbb22a8e97c0f8c3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice Question First", "acc": 0.508, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "368e29fb-506d-4a4e-ac33-0af8d6e1729b", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\nRead this paragraph and choose the correct option from the provided answers:\n\n{{support}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015817274929209004}, {"task_name": "sciq", "prompt_name": "Multiple Choice Question First", "acc_norm": 0.408, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "368e29fb-506d-4a4e-ac33-0af8d6e1729b", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\nRead this paragraph and choose the correct option from the provided answers:\n\n{{support}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015549205052920673}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_1.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_1.json new file mode 100644 index 0000000000000000000000000000000000000000..2f1cae5630d0fc25080eda191cdf1aab31b253b5 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice Question First", "acc": 0.411, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "368e29fb-506d-4a4e-ac33-0af8d6e1729b", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\nRead this paragraph and choose the correct option from the provided answers:\n\n{{support}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015566673418599278}, {"task_name": "sciq", "prompt_name": "Multiple Choice Question First", "acc_norm": 0.423, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "368e29fb-506d-4a4e-ac33-0af8d6e1729b", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\nRead this paragraph and choose the correct option from the provided answers:\n\n{{support}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01563058909047635}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_2.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_2.json new file mode 100644 index 0000000000000000000000000000000000000000..7824c172ab1494889b4cece400311f4e7ad75eed --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice Question First", "acc": 0.366, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "368e29fb-506d-4a4e-ac33-0af8d6e1729b", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\nRead this paragraph and choose the correct option from the provided answers:\n\n{{support}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015240612726405754}, {"task_name": "sciq", "prompt_name": "Multiple Choice Question First", "acc_norm": 0.379, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "368e29fb-506d-4a4e-ac33-0af8d6e1729b", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\nRead this paragraph and choose the correct option from the provided answers:\n\n{{support}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01534909100222535}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_3.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_3.json new file mode 100644 index 0000000000000000000000000000000000000000..944622338454b404231c0fc97defd77636bbfc61 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice Question First", "acc": 0.368, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "368e29fb-506d-4a4e-ac33-0af8d6e1729b", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\nRead this paragraph and choose the correct option from the provided answers:\n\n{{support}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0152580735615218}, {"task_name": "sciq", "prompt_name": "Multiple Choice Question First", "acc_norm": 0.376, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "368e29fb-506d-4a4e-ac33-0af8d6e1729b", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\nRead this paragraph and choose the correct option from the provided answers:\n\n{{support}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01532510550889813}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_4.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_4.json new file mode 100644 index 0000000000000000000000000000000000000000..f91e29e16fef0de9d02df8e98dd029f15129942c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice Question First", "acc": 0.37, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "368e29fb-506d-4a4e-ac33-0af8d6e1729b", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\nRead this paragraph and choose the correct option from the provided answers:\n\n{{support}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015275252316519362}, {"task_name": "sciq", "prompt_name": "Multiple Choice Question First", "acc_norm": 0.371, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "368e29fb-506d-4a4e-ac33-0af8d6e1729b", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\nRead this paragraph and choose the correct option from the provided answers:\n\n{{support}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015283736211823187}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_5.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_5.json new file mode 100644 index 0000000000000000000000000000000000000000..c6edd29db6db3b04e27e73e8c314a933c3a89ce6 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice-Question-First_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice Question First", "acc": 0.361, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "368e29fb-506d-4a4e-ac33-0af8d6e1729b", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\nRead this paragraph and choose the correct option from the provided answers:\n\n{{support}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015195720118175115}, {"task_name": "sciq", "prompt_name": "Multiple Choice Question First", "acc_norm": 0.382, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "368e29fb-506d-4a4e-ac33-0af8d6e1729b", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nQ: {{question}}\n\n\nRead this paragraph and choose the correct option from the provided answers:\n\n{{support}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\n\nA:|||{{answer_choices[3]}}\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015372453034968522}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_0.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_0.json new file mode 100644 index 0000000000000000000000000000000000000000..3e54b9dc370af312c8c6f6aecbc1368526734eb8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice", "acc": 0.486, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "63c22e8a-7029-4ce3-bd26-6ca6a1541563", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nAnswer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}\n\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015813097547730987}, {"task_name": "sciq", "prompt_name": "Multiple Choice", "acc_norm": 0.414, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "63c22e8a-7029-4ce3-bd26-6ca6a1541563", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nAnswer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}\n\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015583544104177519}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_1.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_1.json new file mode 100644 index 0000000000000000000000000000000000000000..773d7c2f7317ca67c5a5e7eb9876bd65a7f302fe --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice", "acc": 0.427, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "63c22e8a-7029-4ce3-bd26-6ca6a1541563", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nAnswer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}\n\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015649789644462224}, {"task_name": "sciq", "prompt_name": "Multiple Choice", "acc_norm": 0.436, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "63c22e8a-7029-4ce3-bd26-6ca6a1541563", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nAnswer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}\n\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015689173023144067}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_2.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_2.json new file mode 100644 index 0000000000000000000000000000000000000000..1912d5182426ccbd1a0a61e7cd472aa9b78440d1 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice", "acc": 0.401, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "63c22e8a-7029-4ce3-bd26-6ca6a1541563", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nAnswer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}\n\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015506109745498325}, {"task_name": "sciq", "prompt_name": "Multiple Choice", "acc_norm": 0.389, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "63c22e8a-7029-4ce3-bd26-6ca6a1541563", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nAnswer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}\n\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015424555647308493}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_3.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_3.json new file mode 100644 index 0000000000000000000000000000000000000000..219adedbb00abe5bced1c0dea40d32a8315b35fd --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice", "acc": 0.385, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "63c22e8a-7029-4ce3-bd26-6ca6a1541563", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nAnswer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}\n\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015395194445410808}, {"task_name": "sciq", "prompt_name": "Multiple Choice", "acc_norm": 0.391, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "63c22e8a-7029-4ce3-bd26-6ca6a1541563", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nAnswer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}\n\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015438826294681783}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_4.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_4.json new file mode 100644 index 0000000000000000000000000000000000000000..e071e41c5fd5e3a0459e8e436bf7a8c443557f3f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice", "acc": 0.376, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "63c22e8a-7029-4ce3-bd26-6ca6a1541563", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nAnswer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}\n\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015325105508898125}, {"task_name": "sciq", "prompt_name": "Multiple Choice", "acc_norm": 0.378, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "63c22e8a-7029-4ce3-bd26-6ca6a1541563", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nAnswer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}\n\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.015341165254026644}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_5.json b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_5.json new file mode 100644 index 0000000000000000000000000000000000000000..ee812cc97f999bda0188570736537889b12272c9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_sciq_Multiple-Choice_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "sciq", "prompt_name": "Multiple Choice", "acc": 0.355, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "63c22e8a-7029-4ce3-bd26-6ca6a1541563", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nAnswer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}\n\n", "prompt_original_task": true, "comment": "", "acc_stderr": 0.015139491543780532}, {"task_name": "sciq", "prompt_name": "Multiple Choice", "acc_norm": 0.374, "fixed_answer_choice_list": null, "dataset_path": "sciq", "dataset_name": null, "subset": null, "prompt_id": "63c22e8a-7029-4ce3-bd26-6ca6a1541563", "prompt_jinja": "{% set order = [[0, 1, 2, 3], [0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1], [0, 3, 1, 2], [0, 3, 2, 1],\n [1, 0, 2, 3], [1, 0, 3, 2], [1, 2, 0, 3], [1, 2, 3, 0], [1, 3, 0, 2], [1, 3, 2, 0],\n [2, 1, 0, 3], [2, 1, 0, 2], [2, 0, 1, 3], [2, 0, 3, 1], [2, 3, 1, 0], [2, 3, 0, 1],\n [3, 1, 2, 0], [3, 1, 0, 2], [3, 2, 1, 0], [3, 2, 0, 1], [3, 0, 1, 2], [3, 0, 2, 1]] | choice %}\nAnswer the following question given this paragraph: \n\n{{support}}\n\n\nQ: {{question}}\n\n Choices:\n\n- {{ answer_choices[order[0]] }}\n\n- {{ answer_choices[order[1]] }}\n\n- {{ answer_choices[order[2]] }}\n\n- {{ answer_choices[order[3]] }}\n\nA:|||{{answer_choices[3]}}\n\n", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01530876736900637}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_0.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_0.json new file mode 100644 index 0000000000000000000000000000000000000000..b15a65c948c77c6b3237a5528429ee99a2655dde --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Answer Given options", "acc": 0.5018706574024586, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8", "prompt_jinja": "{{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What is a possible continuation for the story given the following options ? - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011562351329083268}, {"task_name": "story_cloze_2016", "prompt_name": "Answer Given options", "acc_norm": 0.5093532870122929, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8", "prompt_jinja": "{{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What is a possible continuation for the story given the following options ? - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011560409019420367}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_1.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_1.json new file mode 100644 index 0000000000000000000000000000000000000000..b4344830dfee372f799a7aaf5c8fe612b29e313c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Answer Given options", "acc": 0.4922501336183859, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8", "prompt_jinja": "{{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What is a possible continuation for the story given the following options ? - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011561043278863545}, {"task_name": "story_cloze_2016", "prompt_name": "Answer Given options", "acc_norm": 0.5109567076429716, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8", "prompt_jinja": "{{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What is a possible continuation for the story given the following options ? - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011559655791130729}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_2.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f0ff88c5ecc715cd44f5aacd7b038c106d88c38d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Answer Given options", "acc": 0.4794227685729556, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8", "prompt_jinja": "{{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What is a possible continuation for the story given the following options ? - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011552636515221858}, {"task_name": "story_cloze_2016", "prompt_name": "Answer Given options", "acc_norm": 0.49545697487974344, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8", "prompt_jinja": "{{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What is a possible continuation for the story given the following options ? - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011561954965856519}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_3.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_3.json new file mode 100644 index 0000000000000000000000000000000000000000..70d1e5a63d7f139af67ca6e642b4189f2411632d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Answer Given options", "acc": 0.4836985569214324, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8", "prompt_jinja": "{{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What is a possible continuation for the story given the following options ? - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011556285484521561}, {"task_name": "story_cloze_2016", "prompt_name": "Answer Given options", "acc_norm": 0.5056119722073757, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8", "prompt_jinja": "{{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What is a possible continuation for the story given the following options ? - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011561703928784337}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_4.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_4.json new file mode 100644 index 0000000000000000000000000000000000000000..64f23fdd7bbdef9d9d8516edd5993b1f4ac50b84 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Answer Given options", "acc": 0.48476750400855156, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8", "prompt_jinja": "{{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What is a possible continuation for the story given the following options ? - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011557065368348291}, {"task_name": "story_cloze_2016", "prompt_name": "Answer Given options", "acc_norm": 0.49812934259754144, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8", "prompt_jinja": "{{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What is a possible continuation for the story given the following options ? - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011562351329083266}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_5.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_5.json new file mode 100644 index 0000000000000000000000000000000000000000..0be0420250915e4231cc72a10af7f68c9930abe3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Answer-Given-options_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Answer Given options", "acc": 0.47888829502939606, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8", "prompt_jinja": "{{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What is a possible continuation for the story given the following options ? - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011552120807053815}, {"task_name": "story_cloze_2016", "prompt_name": "Answer Given options", "acc_norm": 0.4938535542490647, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a4946f9-a0e2-4fbb-aee8-b26ead2cf6b8", "prompt_jinja": "{{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What is a possible continuation for the story given the following options ? - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011561558589040751}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_0.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_0.json new file mode 100644 index 0000000000000000000000000000000000000000..8c6cf1ecc3af1733a829daf03aed871a3fdeceb6 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Choose Story Ending", "acc": 0.5093532870122929, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1", "prompt_jinja": "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the previous story from the following options: \n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011560409019420364}, {"task_name": "story_cloze_2016", "prompt_name": "Choose Story Ending", "acc_norm": 0.5200427578834848, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1", "prompt_jinja": "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the previous story from the following options: \n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01155313897796101}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_1.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_1.json new file mode 100644 index 0000000000000000000000000000000000000000..b895c042626044a05a23f91ef3258c5f4772b34d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Choose Story Ending", "acc": 0.49706039551042225, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1", "prompt_jinja": "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the previous story from the following options: \n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011562232421541946}, {"task_name": "story_cloze_2016", "prompt_name": "Choose Story Ending", "acc_norm": 0.5093532870122929, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1", "prompt_jinja": "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the previous story from the following options: \n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011560409019420369}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_2.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_2.json new file mode 100644 index 0000000000000000000000000000000000000000..bdaf0c8da47cf4982ee2c989acbf215cd21d98ba --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Choose Story Ending", "acc": 0.4863709246392304, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1", "prompt_jinja": "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the previous story from the following options: \n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011558135970599896}, {"task_name": "story_cloze_2016", "prompt_name": "Choose Story Ending", "acc_norm": 0.4975948690539818, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1", "prompt_jinja": "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the previous story from the following options: \n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011562298481438055}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_3.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_3.json new file mode 100644 index 0000000000000000000000000000000000000000..1a4301c90f531d84238f6fc93d3378a641bdf3d4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Choose Story Ending", "acc": 0.4922501336183859, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1", "prompt_jinja": "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the previous story from the following options: \n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011561043278863545}, {"task_name": "story_cloze_2016", "prompt_name": "Choose Story Ending", "acc_norm": 0.4997327632282202, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1", "prompt_jinja": "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the previous story from the following options: \n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011562430600098489}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_4.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_4.json new file mode 100644 index 0000000000000000000000000000000000000000..4e93ea8968e7bb1a92db2472b4dd645471aa0b5b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Choose Story Ending", "acc": 0.4778193479422769, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1", "prompt_jinja": "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the previous story from the following options: \n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011551049647290307}, {"task_name": "story_cloze_2016", "prompt_name": "Choose Story Ending", "acc_norm": 0.49706039551042225, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1", "prompt_jinja": "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the previous story from the following options: \n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011562232421541946}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_5.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_5.json new file mode 100644 index 0000000000000000000000000000000000000000..0253cb55785284030a6a73c7523f53af1eb8447f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Choose Story Ending", "acc": 0.4735435595938001, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1", "prompt_jinja": "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the previous story from the following options: \n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011546234813777409}, {"task_name": "story_cloze_2016", "prompt_name": "Choose Story Ending", "acc_norm": 0.49331908070550506, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "1a9d53bc-eb77-4e7c-af6e-3d15b79d6cf1", "prompt_jinja": "Read the following story :\n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose a possible ending for the previous story from the following options: \n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011561400034509398}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_0.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_0.json new file mode 100644 index 0000000000000000000000000000000000000000..44ade405ae0710a20f2831f8fbde3dd8fc70ab23 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_0.json @@ -0,0 +1 @@ +{"results": [], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_1.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_1.json new file mode 100644 index 0000000000000000000000000000000000000000..780f60ceb1b608486241f16d0f84fbf18773207d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_1.json @@ -0,0 +1 @@ +{"results": [], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_2.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8149a45f7d9c8a5c28fb04cdefba89f06a24fb5b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_2.json @@ -0,0 +1 @@ +{"results": [], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_3.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_3.json new file mode 100644 index 0000000000000000000000000000000000000000..4f03b85f5d739d17fd4a1f2ea91f6680e39f71c8 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_3.json @@ -0,0 +1 @@ +{"results": [], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_4.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_4.json new file mode 100644 index 0000000000000000000000000000000000000000..e8f6975abb17a03a018f225a986aef2940822133 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_4.json @@ -0,0 +1 @@ +{"results": [], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_5.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_5.json new file mode 100644 index 0000000000000000000000000000000000000000..1e2dd147e695544a9a12637afc4a14ae3d9bcd69 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Generate-Ending_5.json @@ -0,0 +1 @@ +{"results": [], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_0.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_0.json new file mode 100644 index 0000000000000000000000000000000000000000..1233f09f2c619fb7123cb2bcc8d59ada48b8f790 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Novel Correct Ending", "acc": 0.504008551576697, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "c6f3d802-4f97-449f-a911-03470d418f7d", "prompt_jinja": "I read the following novel: {{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What do you think is the most probable ending? You can choose from the following options: - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011562060664045736}, {"task_name": "story_cloze_2016", "prompt_name": "Novel Correct Ending", "acc_norm": 0.5114911811865313, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "c6f3d802-4f97-449f-a911-03470d418f7d", "prompt_jinja": "I read the following novel: {{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What do you think is the most probable ending? You can choose from the following options: - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011559378273599123}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_1.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_1.json new file mode 100644 index 0000000000000000000000000000000000000000..14164fdb33d4fcc5b87d4a476c9e6c3e8f9b6754 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Novel Correct Ending", "acc": 0.4906467129877071, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "c6f3d802-4f97-449f-a911-03470d418f7d", "prompt_jinja": "I read the following novel: {{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What do you think is the most probable ending? You can choose from the following options: - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011560409019420369}, {"task_name": "story_cloze_2016", "prompt_name": "Novel Correct Ending", "acc_norm": 0.4991982896846606, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "c6f3d802-4f97-449f-a911-03470d418f7d", "prompt_jinja": "I read the following novel: {{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What do you think is the most probable ending? You can choose from the following options: - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011562417388300193}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_2.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_2.json new file mode 100644 index 0000000000000000000000000000000000000000..9b46422cee7872d3702c595e6f0b42401071d0d4 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Novel Correct Ending", "acc": 0.4719401389631213, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "c6f3d802-4f97-449f-a911-03470d418f7d", "prompt_jinja": "I read the following novel: {{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What do you think is the most probable ending? You can choose from the following options: - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011544210396951663}, {"task_name": "story_cloze_2016", "prompt_name": "Novel Correct Ending", "acc_norm": 0.4949225013361839, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "c6f3d802-4f97-449f-a911-03470d418f7d", "prompt_jinja": "I read the following novel: {{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What do you think is the most probable ending? You can choose from the following options: - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01156183605423878}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_3.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_3.json new file mode 100644 index 0000000000000000000000000000000000000000..31a31a6c7bcbeb6f07d618c35dcdc695cd2d466f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Novel Correct Ending", "acc": 0.4820951362907536, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "c6f3d802-4f97-449f-a911-03470d418f7d", "prompt_jinja": "I read the following novel: {{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What do you think is the most probable ending? You can choose from the following options: - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011555016408505476}, {"task_name": "story_cloze_2016", "prompt_name": "Novel Correct Ending", "acc_norm": 0.49599144842330306, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "c6f3d802-4f97-449f-a911-03470d418f7d", "prompt_jinja": "I read the following novel: {{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What do you think is the most probable ending? You can choose from the following options: - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011562060664045727}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_4.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_4.json new file mode 100644 index 0000000000000000000000000000000000000000..ad027e46a37320cd05110881ab5f46b83db162cf --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Novel Correct Ending", "acc": 0.4820951362907536, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "c6f3d802-4f97-449f-a911-03470d418f7d", "prompt_jinja": "I read the following novel: {{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What do you think is the most probable ending? You can choose from the following options: - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011555016408505476}, {"task_name": "story_cloze_2016", "prompt_name": "Novel Correct Ending", "acc_norm": 0.48690539818278994, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "c6f3d802-4f97-449f-a911-03470d418f7d", "prompt_jinja": "I read the following novel: {{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What do you think is the most probable ending? You can choose from the following options: - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011558466383367178}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_5.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_5.json new file mode 100644 index 0000000000000000000000000000000000000000..cd08d8df51bd284443a7ee856223dc63a8afd547 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Novel Correct Ending", "acc": 0.4730090860502405, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "c6f3d802-4f97-449f-a911-03470d418f7d", "prompt_jinja": "I read the following novel: {{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What do you think is the most probable ending? You can choose from the following options: - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011545573278697235}, {"task_name": "story_cloze_2016", "prompt_name": "Novel Correct Ending", "acc_norm": 0.4879743452699091, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "c6f3d802-4f97-449f-a911-03470d418f7d", "prompt_jinja": "I read the following novel: {{input_sentence_1}} {{input_sentence_2}} {{input_sentence_3}} {{input_sentence_4}} What do you think is the most probable ending? You can choose from the following options: - {{answer_choices | join(\"\\n- \")}} ||| {{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011559087533800692}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_0.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_0.json new file mode 100644 index 0000000000000000000000000000000000000000..a34f3f13dfd28c7469f6bd88bf38d4ea3d1c718b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Story Continuation and Options", "acc": 0.5109567076429716, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "b5c8445f-2d3a-4691-bdd5-58956816702f", "prompt_jinja": "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011559655791130734}, {"task_name": "story_cloze_2016", "prompt_name": "Story Continuation and Options", "acc_norm": 0.5232495991448424, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "b5c8445f-2d3a-4691-bdd5-58956816702f", "prompt_jinja": "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01154992548392746}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_1.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_1.json new file mode 100644 index 0000000000000000000000000000000000000000..10456c82cebee2451fed9648c919bf972efce6ae --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Story Continuation and Options", "acc": 0.4917156600748263, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "b5c8445f-2d3a-4691-bdd5-58956816702f", "prompt_jinja": "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01156084507652571}, {"task_name": "story_cloze_2016", "prompt_name": "Story Continuation and Options", "acc_norm": 0.5098877605558525, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "b5c8445f-2d3a-4691-bdd5-58956816702f", "prompt_jinja": "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011560171163157391}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_2.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_2.json new file mode 100644 index 0000000000000000000000000000000000000000..b9fbf76d888ac380f307f725f2368d1ea5dfa355 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Story Continuation and Options", "acc": 0.48743987172634956, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "b5c8445f-2d3a-4691-bdd5-58956816702f", "prompt_jinja": "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01155878357073797}, {"task_name": "story_cloze_2016", "prompt_name": "Story Continuation and Options", "acc_norm": 0.4917156600748263, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "b5c8445f-2d3a-4691-bdd5-58956816702f", "prompt_jinja": "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01156084507652571}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_3.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_3.json new file mode 100644 index 0000000000000000000000000000000000000000..9d9688116092b481a25b02a2d6297ef5fbaf6c49 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Story Continuation and Options", "acc": 0.481560662747194, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "b5c8445f-2d3a-4691-bdd5-58956816702f", "prompt_jinja": "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011554566910658105}, {"task_name": "story_cloze_2016", "prompt_name": "Story Continuation and Options", "acc_norm": 0.49812934259754144, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "b5c8445f-2d3a-4691-bdd5-58956816702f", "prompt_jinja": "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011562351329083266}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_4.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_4.json new file mode 100644 index 0000000000000000000000000000000000000000..5956231eeed169c2758b1fec7678433d447e42b1 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Story Continuation and Options", "acc": 0.47888829502939606, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "b5c8445f-2d3a-4691-bdd5-58956816702f", "prompt_jinja": "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.011552120807053817}, {"task_name": "story_cloze_2016", "prompt_name": "Story Continuation and Options", "acc_norm": 0.4911811865312667, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "b5c8445f-2d3a-4691-bdd5-58956816702f", "prompt_jinja": "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011560633656952963}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_5.json b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_5.json new file mode 100644 index 0000000000000000000000000000000000000000..cd66c8cda95249087ce1dee21c9b6be89d5a50c6 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "story_cloze_2016", "prompt_name": "Story Continuation and Options", "acc": 0.47033671833244256, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "b5c8445f-2d3a-4691-bdd5-58956816702f", "prompt_jinja": "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01154206650976701}, {"task_name": "story_cloze_2016", "prompt_name": "Story Continuation and Options", "acc_norm": 0.48957776590058794, "fixed_answer_choice_list": null, "dataset_path": "story_cloze", "dataset_name": "2016", "subset": null, "prompt_id": "b5c8445f-2d3a-4691-bdd5-58956816702f", "prompt_jinja": "What is a possible continuation for the following story ? \n\n{{input_sentence_1}}\n{{input_sentence_2}}\n{{input_sentence_3}}\n{{input_sentence_4}}\n\nChoose from the following options:\n- {{answer_choices | join(\"\\n- \")}}\n|||\n\n{{answer_choices[answer_right_ending -1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.011559920087347773}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_0.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_0.json new file mode 100644 index 0000000000000000000000000000000000000000..c1d6adb55a2833c0c8761e21fe3879b3e8e68960 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "GPT-3 style", "acc": 0.5379061371841155, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "8fb1c6aa-20e9-438c-bece-c6af1c746449", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True or False? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030009848912529113}, {"task_name": "superglue_rte", "prompt_name": "GPT-3 style", "acc_norm": 0.48375451263537905, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "8fb1c6aa-20e9-438c-bece-c6af1c746449", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True or False? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030080573208738064}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_1.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_1.json new file mode 100644 index 0000000000000000000000000000000000000000..15b3d2085af2bc5b550e007ded298bc9c8060c29 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "GPT-3 style", "acc": 0.48375451263537905, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "8fb1c6aa-20e9-438c-bece-c6af1c746449", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True or False? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030080573208738064}, {"task_name": "superglue_rte", "prompt_name": "GPT-3 style", "acc_norm": 0.49097472924187724, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "8fb1c6aa-20e9-438c-bece-c6af1c746449", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True or False? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030091559826331334}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_2.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_2.json new file mode 100644 index 0000000000000000000000000000000000000000..13fd092b6af7d48f86a2d6c2b1a6df022946b6cb --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "GPT-3 style", "acc": 0.5523465703971119, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "8fb1c6aa-20e9-438c-bece-c6af1c746449", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True or False? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.02993107036293953}, {"task_name": "superglue_rte", "prompt_name": "GPT-3 style", "acc_norm": 0.555956678700361, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "8fb1c6aa-20e9-438c-bece-c6af1c746449", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True or False? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.029907396333795994}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_3.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_3.json new file mode 100644 index 0000000000000000000000000000000000000000..b956f468f8c54901867b4c6e320b33bfdaff1d79 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "GPT-3 style", "acc": 0.5054151624548736, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "8fb1c6aa-20e9-438c-bece-c6af1c746449", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True or False? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030094698123239966}, {"task_name": "superglue_rte", "prompt_name": "GPT-3 style", "acc_norm": 0.4981949458483754, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "8fb1c6aa-20e9-438c-bece-c6af1c746449", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True or False? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030096267148976633}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_4.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b893f318a6161d1660d87ae77719594eae427b89 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "GPT-3 style", "acc": 0.5270758122743683, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "8fb1c6aa-20e9-438c-bece-c6af1c746449", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True or False? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030052303463143706}, {"task_name": "superglue_rte", "prompt_name": "GPT-3 style", "acc_norm": 0.5018050541516246, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "8fb1c6aa-20e9-438c-bece-c6af1c746449", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True or False? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030096267148976633}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_5.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_5.json new file mode 100644 index 0000000000000000000000000000000000000000..a812799cbd419085afba24423b3e6b850e805893 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_GPT-3-style_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "GPT-3 style", "acc": 0.48375451263537905, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "8fb1c6aa-20e9-438c-bece-c6af1c746449", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True or False? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030080573208738064}, {"task_name": "superglue_rte", "prompt_name": "GPT-3 style", "acc_norm": 0.4729241877256318, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "8fb1c6aa-20e9-438c-bece-c6af1c746449", "prompt_jinja": "{{premise}}\nQuestion: {{hypothesis}} True or False? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030052303463143706}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_0.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_0.json new file mode 100644 index 0000000000000000000000000000000000000000..945830dc4a7483e371a38a50c7f54af3f1bfbe1a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "MNLI crowdsource", "acc": 0.5270758122743683, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2b52a83c-0021-41fe-b44c-5aaa076d71a2", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, is \"{{hypothesis}}\" definitely correct? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030052303463143706}, {"task_name": "superglue_rte", "prompt_name": "MNLI crowdsource", "acc_norm": 0.5270758122743683, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2b52a83c-0021-41fe-b44c-5aaa076d71a2", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, is \"{{hypothesis}}\" definitely correct? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030052303463143706}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_1.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_1.json new file mode 100644 index 0000000000000000000000000000000000000000..67130d5f00425c209a7260a844957d36ffedef8f --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "MNLI crowdsource", "acc": 0.48375451263537905, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2b52a83c-0021-41fe-b44c-5aaa076d71a2", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, is \"{{hypothesis}}\" definitely correct? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030080573208738064}, {"task_name": "superglue_rte", "prompt_name": "MNLI crowdsource", "acc_norm": 0.48014440433212996, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2b52a83c-0021-41fe-b44c-5aaa076d71a2", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, is \"{{hypothesis}}\" definitely correct? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030072723167317194}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_2.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_2.json new file mode 100644 index 0000000000000000000000000000000000000000..979291fc9d3c7df0c0bfac9a4b04ced254cfa137 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "MNLI crowdsource", "acc": 0.5018050541516246, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2b52a83c-0021-41fe-b44c-5aaa076d71a2", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, is \"{{hypothesis}}\" definitely correct? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030096267148976633}, {"task_name": "superglue_rte", "prompt_name": "MNLI crowdsource", "acc_norm": 0.5018050541516246, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2b52a83c-0021-41fe-b44c-5aaa076d71a2", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, is \"{{hypothesis}}\" definitely correct? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030096267148976633}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_3.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_3.json new file mode 100644 index 0000000000000000000000000000000000000000..fd36ae71e33b59e17d5bbc588da0f27493f9aa4b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "MNLI crowdsource", "acc": 0.4981949458483754, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2b52a83c-0021-41fe-b44c-5aaa076d71a2", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, is \"{{hypothesis}}\" definitely correct? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030096267148976633}, {"task_name": "superglue_rte", "prompt_name": "MNLI crowdsource", "acc_norm": 0.4729241877256318, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2b52a83c-0021-41fe-b44c-5aaa076d71a2", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, is \"{{hypothesis}}\" definitely correct? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030052303463143706}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_4.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_4.json new file mode 100644 index 0000000000000000000000000000000000000000..888e0135a2ed3b0e6a34e25c3474285742d17886 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "MNLI crowdsource", "acc": 0.49458483754512633, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2b52a83c-0021-41fe-b44c-5aaa076d71a2", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, is \"{{hypothesis}}\" definitely correct? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030094698123239966}, {"task_name": "superglue_rte", "prompt_name": "MNLI crowdsource", "acc_norm": 0.51985559566787, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2b52a83c-0021-41fe-b44c-5aaa076d71a2", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, is \"{{hypothesis}}\" definitely correct? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030072723167317184}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_5.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_5.json new file mode 100644 index 0000000000000000000000000000000000000000..fa96bce126976eab06571e714e416ef2c82c05ac --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_MNLI-crowdsource_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "MNLI crowdsource", "acc": 0.49458483754512633, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2b52a83c-0021-41fe-b44c-5aaa076d71a2", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, is \"{{hypothesis}}\" definitely correct? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030094698123239966}, {"task_name": "superglue_rte", "prompt_name": "MNLI crowdsource", "acc_norm": 0.4981949458483754, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2b52a83c-0021-41fe-b44c-5aaa076d71a2", "prompt_jinja": "{{premise}} Using only the above description and what you know about the world, is \"{{hypothesis}}\" definitely correct? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030096267148976626}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_0.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_0.json new file mode 100644 index 0000000000000000000000000000000000000000..482a938b52dfa1b570d3cd2cb45ce43ab5a20390 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "does it follow that", "acc": 0.516245487364621, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "e2fb58f2-b1f2-4aef-b74b-c4ee1c571fff", "prompt_jinja": "Given that {{premise}} Does it follow that {{hypothesis}} Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030080573208738064}, {"task_name": "superglue_rte", "prompt_name": "does it follow that", "acc_norm": 0.5270758122743683, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "e2fb58f2-b1f2-4aef-b74b-c4ee1c571fff", "prompt_jinja": "Given that {{premise}} Does it follow that {{hypothesis}} Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030052303463143706}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_1.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_1.json new file mode 100644 index 0000000000000000000000000000000000000000..d7263f6f41e8cfdfe59052cb39b3ec9a7e2ad283 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "does it follow that", "acc": 0.4981949458483754, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "e2fb58f2-b1f2-4aef-b74b-c4ee1c571fff", "prompt_jinja": "Given that {{premise}} Does it follow that {{hypothesis}} Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030096267148976633}, {"task_name": "superglue_rte", "prompt_name": "does it follow that", "acc_norm": 0.4981949458483754, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "e2fb58f2-b1f2-4aef-b74b-c4ee1c571fff", "prompt_jinja": "Given that {{premise}} Does it follow that {{hypothesis}} Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030096267148976626}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_2.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_2.json new file mode 100644 index 0000000000000000000000000000000000000000..df30f22db7d8749eb4b388394c4ceae74a758876 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "does it follow that", "acc": 0.5054151624548736, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "e2fb58f2-b1f2-4aef-b74b-c4ee1c571fff", "prompt_jinja": "Given that {{premise}} Does it follow that {{hypothesis}} Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030094698123239966}, {"task_name": "superglue_rte", "prompt_name": "does it follow that", "acc_norm": 0.4981949458483754, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "e2fb58f2-b1f2-4aef-b74b-c4ee1c571fff", "prompt_jinja": "Given that {{premise}} Does it follow that {{hypothesis}} Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030096267148976626}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_3.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_3.json new file mode 100644 index 0000000000000000000000000000000000000000..f25d18ecafae6c2f81dae9db8a7d53b619f7e702 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "does it follow that", "acc": 0.48736462093862815, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "e2fb58f2-b1f2-4aef-b74b-c4ee1c571fff", "prompt_jinja": "Given that {{premise}} Does it follow that {{hypothesis}} Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030086851767188564}, {"task_name": "superglue_rte", "prompt_name": "does it follow that", "acc_norm": 0.5090252707581228, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "e2fb58f2-b1f2-4aef-b74b-c4ee1c571fff", "prompt_jinja": "Given that {{premise}} Does it follow that {{hypothesis}} Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030091559826331334}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_4.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_4.json new file mode 100644 index 0000000000000000000000000000000000000000..645d65926726f1fdc6aa65408565efb497bf4036 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "does it follow that", "acc": 0.5090252707581228, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "e2fb58f2-b1f2-4aef-b74b-c4ee1c571fff", "prompt_jinja": "Given that {{premise}} Does it follow that {{hypothesis}} Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030091559826331334}, {"task_name": "superglue_rte", "prompt_name": "does it follow that", "acc_norm": 0.51985559566787, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "e2fb58f2-b1f2-4aef-b74b-c4ee1c571fff", "prompt_jinja": "Given that {{premise}} Does it follow that {{hypothesis}} Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030072723167317184}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_5.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_5.json new file mode 100644 index 0000000000000000000000000000000000000000..3a67c5ca7d5198f35a9807a150f184eca7071b73 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_does-it-follow-that_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "does it follow that", "acc": 0.48014440433212996, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "e2fb58f2-b1f2-4aef-b74b-c4ee1c571fff", "prompt_jinja": "Given that {{premise}} Does it follow that {{hypothesis}} Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0300727231673172}, {"task_name": "superglue_rte", "prompt_name": "does it follow that", "acc_norm": 0.48736462093862815, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "e2fb58f2-b1f2-4aef-b74b-c4ee1c571fff", "prompt_jinja": "Given that {{premise}} Does it follow that {{hypothesis}} Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030086851767188564}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_0.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_0.json new file mode 100644 index 0000000000000000000000000000000000000000..c069b86bad3aec05bab47dcea6d28bbeb3163cfa --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "guaranteed true", "acc": 0.5342960288808665, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2d0d63da-ffcf-4f6e-941a-b8da922be43e", "prompt_jinja": "Given {{premise}} Is it guaranteed true that \"{{hypothesis}}\"? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030025579819366422}, {"task_name": "superglue_rte", "prompt_name": "guaranteed true", "acc_norm": 0.5270758122743683, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2d0d63da-ffcf-4f6e-941a-b8da922be43e", "prompt_jinja": "Given {{premise}} Is it guaranteed true that \"{{hypothesis}}\"? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030052303463143706}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_1.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_1.json new file mode 100644 index 0000000000000000000000000000000000000000..ff69bacfe6a6e394dd5c656e22428a5f47d60e57 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "guaranteed true", "acc": 0.48014440433212996, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2d0d63da-ffcf-4f6e-941a-b8da922be43e", "prompt_jinja": "Given {{premise}} Is it guaranteed true that \"{{hypothesis}}\"? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030072723167317194}, {"task_name": "superglue_rte", "prompt_name": "guaranteed true", "acc_norm": 0.48736462093862815, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2d0d63da-ffcf-4f6e-941a-b8da922be43e", "prompt_jinja": "Given {{premise}} Is it guaranteed true that \"{{hypothesis}}\"? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030086851767188564}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_2.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_2.json new file mode 100644 index 0000000000000000000000000000000000000000..168324142bb9e377efd707c544dff62b7ceb69c0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "guaranteed true", "acc": 0.49458483754512633, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2d0d63da-ffcf-4f6e-941a-b8da922be43e", "prompt_jinja": "Given {{premise}} Is it guaranteed true that \"{{hypothesis}}\"? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030094698123239966}, {"task_name": "superglue_rte", "prompt_name": "guaranteed true", "acc_norm": 0.49458483754512633, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2d0d63da-ffcf-4f6e-941a-b8da922be43e", "prompt_jinja": "Given {{premise}} Is it guaranteed true that \"{{hypothesis}}\"? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030094698123239966}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_3.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_3.json new file mode 100644 index 0000000000000000000000000000000000000000..aa30275330665479e0267429de7919fe5b4c62f9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "guaranteed true", "acc": 0.4981949458483754, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2d0d63da-ffcf-4f6e-941a-b8da922be43e", "prompt_jinja": "Given {{premise}} Is it guaranteed true that \"{{hypothesis}}\"? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030096267148976626}, {"task_name": "superglue_rte", "prompt_name": "guaranteed true", "acc_norm": 0.48014440433212996, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2d0d63da-ffcf-4f6e-941a-b8da922be43e", "prompt_jinja": "Given {{premise}} Is it guaranteed true that \"{{hypothesis}}\"? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.0300727231673172}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_4.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b0f58f6913a263cc27c3fe539a55a9c6d2c35535 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "guaranteed true", "acc": 0.47653429602888087, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2d0d63da-ffcf-4f6e-941a-b8da922be43e", "prompt_jinja": "Given {{premise}} Is it guaranteed true that \"{{hypothesis}}\"? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.03006330041190266}, {"task_name": "superglue_rte", "prompt_name": "guaranteed true", "acc_norm": 0.4981949458483754, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2d0d63da-ffcf-4f6e-941a-b8da922be43e", "prompt_jinja": "Given {{premise}} Is it guaranteed true that \"{{hypothesis}}\"? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030096267148976633}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_5.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_5.json new file mode 100644 index 0000000000000000000000000000000000000000..729b2e105a016eeac4a58501cad2a20cc0cf90ef --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_guaranteed-true_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "guaranteed true", "acc": 0.49458483754512633, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2d0d63da-ffcf-4f6e-941a-b8da922be43e", "prompt_jinja": "Given {{premise}} Is it guaranteed true that \"{{hypothesis}}\"? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030094698123239966}, {"task_name": "superglue_rte", "prompt_name": "guaranteed true", "acc_norm": 0.5054151624548736, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "2d0d63da-ffcf-4f6e-941a-b8da922be43e", "prompt_jinja": "Given {{premise}} Is it guaranteed true that \"{{hypothesis}}\"? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030094698123239966}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_0.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_0.json new file mode 100644 index 0000000000000000000000000000000000000000..d48e3fe1deb68dcc9e38b46832251a17ccdb11f3 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "should assume", "acc": 0.5306859205776173, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "b8dc85c6-28b6-4340-979a-8e77c2a0dde8", "prompt_jinja": "Given {{premise}} Should we assume that \"{{hypothesis}}\" is true? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.03003973059219781}, {"task_name": "superglue_rte", "prompt_name": "should assume", "acc_norm": 0.5270758122743683, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "b8dc85c6-28b6-4340-979a-8e77c2a0dde8", "prompt_jinja": "Given {{premise}} Should we assume that \"{{hypothesis}}\" is true? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030052303463143706}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_1.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_1.json new file mode 100644 index 0000000000000000000000000000000000000000..df0b863cb7ee74b116ce346ea7d7aa81fdfc9f1c --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "should assume", "acc": 0.4729241877256318, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "b8dc85c6-28b6-4340-979a-8e77c2a0dde8", "prompt_jinja": "Given {{premise}} Should we assume that \"{{hypothesis}}\" is true? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030052303463143706}, {"task_name": "superglue_rte", "prompt_name": "should assume", "acc_norm": 0.48014440433212996, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "b8dc85c6-28b6-4340-979a-8e77c2a0dde8", "prompt_jinja": "Given {{premise}} Should we assume that \"{{hypothesis}}\" is true? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.0300727231673172}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_2.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_2.json new file mode 100644 index 0000000000000000000000000000000000000000..21a0dcedcbfd682aaaf92b2b29bd44c2f57ad0fd --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "should assume", "acc": 0.49458483754512633, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "b8dc85c6-28b6-4340-979a-8e77c2a0dde8", "prompt_jinja": "Given {{premise}} Should we assume that \"{{hypothesis}}\" is true? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030094698123239966}, {"task_name": "superglue_rte", "prompt_name": "should assume", "acc_norm": 0.4981949458483754, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "b8dc85c6-28b6-4340-979a-8e77c2a0dde8", "prompt_jinja": "Given {{premise}} Should we assume that \"{{hypothesis}}\" is true? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030096267148976626}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_3.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_3.json new file mode 100644 index 0000000000000000000000000000000000000000..d4471a373c564519ef165fe368a284e058e81616 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "should assume", "acc": 0.48375451263537905, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "b8dc85c6-28b6-4340-979a-8e77c2a0dde8", "prompt_jinja": "Given {{premise}} Should we assume that \"{{hypothesis}}\" is true? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030080573208738064}, {"task_name": "superglue_rte", "prompt_name": "should assume", "acc_norm": 0.48736462093862815, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "b8dc85c6-28b6-4340-979a-8e77c2a0dde8", "prompt_jinja": "Given {{premise}} Should we assume that \"{{hypothesis}}\" is true? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030086851767188564}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_4.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_4.json new file mode 100644 index 0000000000000000000000000000000000000000..feaa4d517a1cb3dbfaa5f68592fcd87e3cfbc803 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "should assume", "acc": 0.4729241877256318, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "b8dc85c6-28b6-4340-979a-8e77c2a0dde8", "prompt_jinja": "Given {{premise}} Should we assume that \"{{hypothesis}}\" is true? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030052303463143706}, {"task_name": "superglue_rte", "prompt_name": "should assume", "acc_norm": 0.48375451263537905, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "b8dc85c6-28b6-4340-979a-8e77c2a0dde8", "prompt_jinja": "Given {{premise}} Should we assume that \"{{hypothesis}}\" is true? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030080573208738064}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_5.json b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_5.json new file mode 100644 index 0000000000000000000000000000000000000000..8e89e903a91b05604efe120f6a897b59b2cff00d --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_superglue_rte_should-assume_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "superglue_rte", "prompt_name": "should assume", "acc": 0.48736462093862815, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "b8dc85c6-28b6-4340-979a-8e77c2a0dde8", "prompt_jinja": "Given {{premise}} Should we assume that \"{{hypothesis}}\" is true? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.030086851767188564}, {"task_name": "superglue_rte", "prompt_name": "should assume", "acc_norm": 0.5090252707581228, "fixed_answer_choice_list": ["Yes", "No"], "dataset_path": "super_glue", "dataset_name": "rte", "subset": null, "prompt_id": "b8dc85c6-28b6-4340-979a-8e77c2a0dde8", "prompt_jinja": "Given {{premise}} Should we assume that \"{{hypothesis}}\" is true? Yes or no? ||| {% if label != -1 %}{{ answer_choices[label] }}{% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.030091559826331334}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_0.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_0.json new file mode 100644 index 0000000000000000000000000000000000000000..64dc11939c19787e2c36d9f0600c0ae1b8901151 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "Replace", "acc": 0.505130228887135, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "e7e42e9e-bc57-46ed-ad8a-76a5b90a5bb9", "prompt_jinja": "{{sentence}}\nReplace the _ in the above sentence with the correct option: \n- {{option1}}\n- {{option2}}\n|||\n{% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014051745961790513}, {"task_name": "winogrande", "prompt_name": "Replace", "acc_norm": 0.494869771112865, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "e7e42e9e-bc57-46ed-ad8a-76a5b90a5bb9", "prompt_jinja": "{{sentence}}\nReplace the _ in the above sentence with the correct option: \n- {{option1}}\n- {{option2}}\n|||\n{% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014051745961790523}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_1.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_1.json new file mode 100644 index 0000000000000000000000000000000000000000..2c7f171905cd69f83de7a86a775662a4cfb1ccbb --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "Replace", "acc": 0.5059194948697711, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "e7e42e9e-bc57-46ed-ad8a-76a5b90a5bb9", "prompt_jinja": "{{sentence}}\nReplace the _ in the above sentence with the correct option: \n- {{option1}}\n- {{option2}}\n|||\n{% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01405150083848581}, {"task_name": "winogrande", "prompt_name": "Replace", "acc_norm": 0.4972375690607735, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "e7e42e9e-bc57-46ed-ad8a-76a5b90a5bb9", "prompt_jinja": "{{sentence}}\nReplace the _ in the above sentence with the correct option: \n- {{option1}}\n- {{option2}}\n|||\n{% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014052271211616448}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_2.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8e9399f1008a270fc02c82aa1b65b9017ce4d2bd --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "Replace", "acc": 0.4980268350434096, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "e7e42e9e-bc57-46ed-ad8a-76a5b90a5bb9", "prompt_jinja": "{{sentence}}\nReplace the _ in the above sentence with the correct option: \n- {{option1}}\n- {{option2}}\n|||\n{% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014052376259225636}, {"task_name": "winogrande", "prompt_name": "Replace", "acc_norm": 0.4964483030781373, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "e7e42e9e-bc57-46ed-ad8a-76a5b90a5bb9", "prompt_jinja": "{{sentence}}\nReplace the _ in the above sentence with the correct option: \n- {{option1}}\n- {{option2}}\n|||\n{% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014052131146915867}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_3.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_3.json new file mode 100644 index 0000000000000000000000000000000000000000..8363def2cae9da153a3622d4466a98bda82ea0a9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "Replace", "acc": 0.5090765588003157, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "e7e42e9e-bc57-46ed-ad8a-76a5b90a5bb9", "prompt_jinja": "{{sentence}}\nReplace the _ in the above sentence with the correct option: \n- {{option1}}\n- {{option2}}\n|||\n{% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014050170094497707}, {"task_name": "winogrande", "prompt_name": "Replace", "acc_norm": 0.48855564325177586, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "e7e42e9e-bc57-46ed-ad8a-76a5b90a5bb9", "prompt_jinja": "{{sentence}}\nReplace the _ in the above sentence with the correct option: \n- {{option1}}\n- {{option2}}\n|||\n{% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014048804199859322}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_4.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_4.json new file mode 100644 index 0000000000000000000000000000000000000000..aef0d3b8f5bbf6597748c2e26e8aacaf5574462b --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "Replace", "acc": 0.5011838989739542, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "e7e42e9e-bc57-46ed-ad8a-76a5b90a5bb9", "prompt_jinja": "{{sentence}}\nReplace the _ in the above sentence with the correct option: \n- {{option1}}\n- {{option2}}\n|||\n{% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014052446290529015}, {"task_name": "winogrande", "prompt_name": "Replace", "acc_norm": 0.49013417521704816, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "e7e42e9e-bc57-46ed-ad8a-76a5b90a5bb9", "prompt_jinja": "{{sentence}}\nReplace the _ in the above sentence with the correct option: \n- {{option1}}\n- {{option2}}\n|||\n{% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014049749833367592}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_5.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_5.json new file mode 100644 index 0000000000000000000000000000000000000000..06dbe74cd597c20d2914ef823f6ea88694ce9abf --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_Replace_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "Replace", "acc": 0.5114443567482242, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "e7e42e9e-bc57-46ed-ad8a-76a5b90a5bb9", "prompt_jinja": "{{sentence}}\nReplace the _ in the above sentence with the correct option: \n- {{option1}}\n- {{option2}}\n|||\n{% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014048804199859325}, {"task_name": "winogrande", "prompt_name": "Replace", "acc_norm": 0.4925019731649566, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "e7e42e9e-bc57-46ed-ad8a-76a5b90a5bb9", "prompt_jinja": "{{sentence}}\nReplace the _ in the above sentence with the correct option: \n- {{option1}}\n- {{option2}}\n|||\n{% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01405090552122858}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_0.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_0.json new file mode 100644 index 0000000000000000000000000000000000000000..780dbc86cc9f0d37c6cbfcc1e4e7fbe969b4f0a9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "True or False", "acc": 0.4964483030781373, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "d11378d1-2f24-4509-bbbc-bfa2921300d5", "prompt_jinja": "The _ in the sentence below refers to {{option1}}. True or False?\n{{sentence}}|||\n{{answer_choices[answer|int - 1]}}", "prompt_original_task": false, "comment": "", "acc_stderr": 0.01405213114691586}, {"task_name": "winogrande", "prompt_name": "True or False", "acc_norm": 0.494869771112865, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "d11378d1-2f24-4509-bbbc-bfa2921300d5", "prompt_jinja": "The _ in the sentence below refers to {{option1}}. True or False?\n{{sentence}}|||\n{{answer_choices[answer|int - 1]}}", "prompt_original_task": false, "comment": "", "acc_norm_stderr": 0.014051745961790516}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_1.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_1.json new file mode 100644 index 0000000000000000000000000000000000000000..bf7c8124040128b9e3ce738fc357a532e0b45aad --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "True or False", "acc": 0.4925019731649566, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "d11378d1-2f24-4509-bbbc-bfa2921300d5", "prompt_jinja": "The _ in the sentence below refers to {{option1}}. True or False?\n{{sentence}}|||\n{{answer_choices[answer|int - 1]}}", "prompt_original_task": false, "comment": "", "acc_stderr": 0.014050905521228577}, {"task_name": "winogrande", "prompt_name": "True or False", "acc_norm": 0.49329123914759276, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "d11378d1-2f24-4509-bbbc-bfa2921300d5", "prompt_jinja": "The _ in the sentence below refers to {{option1}}. True or False?\n{{sentence}}|||\n{{answer_choices[answer|int - 1]}}", "prompt_original_task": false, "comment": "", "acc_norm_stderr": 0.014051220692330349}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_2.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_2.json new file mode 100644 index 0000000000000000000000000000000000000000..9b5cfa048bdb8dc95bdab8d5757ae65e298b9df0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "True or False", "acc": 0.5027624309392266, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "d11378d1-2f24-4509-bbbc-bfa2921300d5", "prompt_jinja": "The _ in the sentence below refers to {{option1}}. True or False?\n{{sentence}}|||\n{{answer_choices[answer|int - 1]}}", "prompt_original_task": false, "comment": "", "acc_stderr": 0.014052271211616441}, {"task_name": "winogrande", "prompt_name": "True or False", "acc_norm": 0.4940805051302289, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "d11378d1-2f24-4509-bbbc-bfa2921300d5", "prompt_jinja": "The _ in the sentence below refers to {{option1}}. True or False?\n{{sentence}}|||\n{{answer_choices[answer|int - 1]}}", "prompt_original_task": false, "comment": "", "acc_norm_stderr": 0.014051500838485807}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_3.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_3.json new file mode 100644 index 0000000000000000000000000000000000000000..59c25f746b604f080495fac109398cfb21321450 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "True or False", "acc": 0.516179952644041, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "d11378d1-2f24-4509-bbbc-bfa2921300d5", "prompt_jinja": "The _ in the sentence below refers to {{option1}}. True or False?\n{{sentence}}|||\n{{answer_choices[answer|int - 1]}}", "prompt_original_task": false, "comment": "", "acc_stderr": 0.0140451261309786}, {"task_name": "winogrande", "prompt_name": "True or False", "acc_norm": 0.5011838989739542, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "d11378d1-2f24-4509-bbbc-bfa2921300d5", "prompt_jinja": "The _ in the sentence below refers to {{option1}}. True or False?\n{{sentence}}|||\n{{answer_choices[answer|int - 1]}}", "prompt_original_task": false, "comment": "", "acc_norm_stderr": 0.014052446290529019}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_4.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_4.json new file mode 100644 index 0000000000000000000000000000000000000000..a14fef3e406f88ffd0003e9fa5a063340a7bb94e --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "True or False", "acc": 0.500394632991318, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "d11378d1-2f24-4509-bbbc-bfa2921300d5", "prompt_jinja": "The _ in the sentence below refers to {{option1}}. True or False?\n{{sentence}}|||\n{{answer_choices[answer|int - 1]}}", "prompt_original_task": false, "comment": "", "acc_stderr": 0.014052481306049516}, {"task_name": "winogrande", "prompt_name": "True or False", "acc_norm": 0.4996053670086819, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "d11378d1-2f24-4509-bbbc-bfa2921300d5", "prompt_jinja": "The _ in the sentence below refers to {{option1}}. True or False?\n{{sentence}}|||\n{{answer_choices[answer|int - 1]}}", "prompt_original_task": false, "comment": "", "acc_norm_stderr": 0.014052481306049512}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_5.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_5.json new file mode 100644 index 0000000000000000000000000000000000000000..b753f3f5965a4086e99f0549cd553b8461220053 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_True-or-False_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "True or False", "acc": 0.5011838989739542, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "d11378d1-2f24-4509-bbbc-bfa2921300d5", "prompt_jinja": "The _ in the sentence below refers to {{option1}}. True or False?\n{{sentence}}|||\n{{answer_choices[answer|int - 1]}}", "prompt_original_task": false, "comment": "", "acc_stderr": 0.014052446290529019}, {"task_name": "winogrande", "prompt_name": "True or False", "acc_norm": 0.5011838989739542, "fixed_answer_choice_list": ["True", "False"], "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "d11378d1-2f24-4509-bbbc-bfa2921300d5", "prompt_jinja": "The _ in the sentence below refers to {{option1}}. True or False?\n{{sentence}}|||\n{{answer_choices[answer|int - 1]}}", "prompt_original_task": false, "comment": "", "acc_norm_stderr": 0.014052446290529012}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_0.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_0.json new file mode 100644 index 0000000000000000000000000000000000000000..b64e0d57bd68fd415d3ef6b02d0247181d48dbf0 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "does underscore refer to", "acc": 0.5067087608524072, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "0f23f058-5b4d-42a3-92d4-5d60688aa90c", "prompt_jinja": "{{ sentence }} In the previous sentence, does _ refer to {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014051220692330349}, {"task_name": "winogrande", "prompt_name": "does underscore refer to", "acc_norm": 0.49329123914759276, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "0f23f058-5b4d-42a3-92d4-5d60688aa90c", "prompt_jinja": "{{ sentence }} In the previous sentence, does _ refer to {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014051220692330349}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_1.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_1.json new file mode 100644 index 0000000000000000000000000000000000000000..1642cf64509d853be7fdc6d660a32d5475fc1c76 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "does underscore refer to", "acc": 0.5169692186266772, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "0f23f058-5b4d-42a3-92d4-5d60688aa90c", "prompt_jinja": "{{ sentence }} In the previous sentence, does _ refer to {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014044390401612976}, {"task_name": "winogrande", "prompt_name": "does underscore refer to", "acc_norm": 0.5011838989739542, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "0f23f058-5b4d-42a3-92d4-5d60688aa90c", "prompt_jinja": "{{ sentence }} In the previous sentence, does _ refer to {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014052446290529015}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_2.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_2.json new file mode 100644 index 0000000000000000000000000000000000000000..66c2f028286ce1dd09019041197d42f9a4d99ca2 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "does underscore refer to", "acc": 0.489344909234412, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "0f23f058-5b4d-42a3-92d4-5d60688aa90c", "prompt_jinja": "{{ sentence }} In the previous sentence, does _ refer to {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.0140492945362904}, {"task_name": "winogrande", "prompt_name": "does underscore refer to", "acc_norm": 0.4964483030781373, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "0f23f058-5b4d-42a3-92d4-5d60688aa90c", "prompt_jinja": "{{ sentence }} In the previous sentence, does _ refer to {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014052131146915853}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_3.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_3.json new file mode 100644 index 0000000000000000000000000000000000000000..062e590f0a919284e4924dbb3d5f8116f739a853 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "does underscore refer to", "acc": 0.500394632991318, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "0f23f058-5b4d-42a3-92d4-5d60688aa90c", "prompt_jinja": "{{ sentence }} In the previous sentence, does _ refer to {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014052481306049516}, {"task_name": "winogrande", "prompt_name": "does underscore refer to", "acc_norm": 0.4877663772691397, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "0f23f058-5b4d-42a3-92d4-5d60688aa90c", "prompt_jinja": "{{ sentence }} In the previous sentence, does _ refer to {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014048278820405616}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_4.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b510b337d04bbff4e9679f3825304a912c93e4a9 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "does underscore refer to", "acc": 0.48855564325177586, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "0f23f058-5b4d-42a3-92d4-5d60688aa90c", "prompt_jinja": "{{ sentence }} In the previous sentence, does _ refer to {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014048804199859332}, {"task_name": "winogrande", "prompt_name": "does underscore refer to", "acc_norm": 0.49013417521704816, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "0f23f058-5b4d-42a3-92d4-5d60688aa90c", "prompt_jinja": "{{ sentence }} In the previous sentence, does _ refer to {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014049749833367596}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_5.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_5.json new file mode 100644 index 0000000000000000000000000000000000000000..cf4a1fb504916a2f94c00b19e6a03966f8b76b73 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_does-underscore-refer-to_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "does underscore refer to", "acc": 0.4964483030781373, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "0f23f058-5b4d-42a3-92d4-5d60688aa90c", "prompt_jinja": "{{ sentence }} In the previous sentence, does _ refer to {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014052131146915852}, {"task_name": "winogrande", "prompt_name": "does underscore refer to", "acc_norm": 0.500394632991318, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "0f23f058-5b4d-42a3-92d4-5d60688aa90c", "prompt_jinja": "{{ sentence }} In the previous sentence, does _ refer to {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014052481306049516}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_0.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_0.json new file mode 100644 index 0000000000000000000000000000000000000000..55aefa23339f444de5ac7d7ad384ff1846c17679 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "stand for", "acc": 0.5256511444356748, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5080f912-fac8-400f-983c-944baf9b10c0", "prompt_jinja": "In the sentence below, does the _ stand for {{answer_choices[0]}} or {{answer_choices[1]}}?\n{{sentence}}|||\n{{answer_choices[answer | int - 1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014033980956108558}, {"task_name": "winogrande", "prompt_name": "stand for", "acc_norm": 0.5138121546961326, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5080f912-fac8-400f-983c-944baf9b10c0", "prompt_jinja": "In the sentence below, does the _ stand for {{answer_choices[0]}} or {{answer_choices[1]}}?\n{{sentence}}|||\n{{answer_choices[answer | int - 1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014047122916440419}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_1.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_1.json new file mode 100644 index 0000000000000000000000000000000000000000..40b39d0fd42d927363ec2a0f9dc3af964da9419a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "stand for", "acc": 0.5146014206787688, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5080f912-fac8-400f-983c-944baf9b10c0", "prompt_jinja": "In the sentence below, does the _ stand for {{answer_choices[0]}} or {{answer_choices[1]}}?\n{{sentence}}|||\n{{answer_choices[answer | int - 1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014046492383275832}, {"task_name": "winogrande", "prompt_name": "stand for", "acc_norm": 0.500394632991318, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5080f912-fac8-400f-983c-944baf9b10c0", "prompt_jinja": "In the sentence below, does the _ stand for {{answer_choices[0]}} or {{answer_choices[1]}}?\n{{sentence}}|||\n{{answer_choices[answer | int - 1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014052481306049516}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_2.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ffef302c6b36e465147f03f25cada0e8705dcb3a --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "stand for", "acc": 0.5082872928176796, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5080f912-fac8-400f-983c-944baf9b10c0", "prompt_jinja": "In the sentence below, does the _ stand for {{answer_choices[0]}} or {{answer_choices[1]}}?\n{{sentence}}|||\n{{answer_choices[answer | int - 1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014050555322824189}, {"task_name": "winogrande", "prompt_name": "stand for", "acc_norm": 0.5027624309392266, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5080f912-fac8-400f-983c-944baf9b10c0", "prompt_jinja": "In the sentence below, does the _ stand for {{answer_choices[0]}} or {{answer_choices[1]}}?\n{{sentence}}|||\n{{answer_choices[answer | int - 1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014052271211616441}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_3.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_3.json new file mode 100644 index 0000000000000000000000000000000000000000..3550ad5c9a3944ff0e26cc397b454283a5768cea --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "stand for", "acc": 0.49329123914759276, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5080f912-fac8-400f-983c-944baf9b10c0", "prompt_jinja": "In the sentence below, does the _ stand for {{answer_choices[0]}} or {{answer_choices[1]}}?\n{{sentence}}|||\n{{answer_choices[answer | int - 1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014051220692330349}, {"task_name": "winogrande", "prompt_name": "stand for", "acc_norm": 0.4925019731649566, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5080f912-fac8-400f-983c-944baf9b10c0", "prompt_jinja": "In the sentence below, does the _ stand for {{answer_choices[0]}} or {{answer_choices[1]}}?\n{{sentence}}|||\n{{answer_choices[answer | int - 1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014050905521228577}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_4.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_4.json new file mode 100644 index 0000000000000000000000000000000000000000..ac0e99710c028b7ea5021f4d1a8f738d7c97eddd --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "stand for", "acc": 0.5035516969218626, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5080f912-fac8-400f-983c-944baf9b10c0", "prompt_jinja": "In the sentence below, does the _ stand for {{answer_choices[0]}} or {{answer_choices[1]}}?\n{{sentence}}|||\n{{answer_choices[answer | int - 1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014052131146915867}, {"task_name": "winogrande", "prompt_name": "stand for", "acc_norm": 0.4988161010260458, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5080f912-fac8-400f-983c-944baf9b10c0", "prompt_jinja": "In the sentence below, does the _ stand for {{answer_choices[0]}} or {{answer_choices[1]}}?\n{{sentence}}|||\n{{answer_choices[answer | int - 1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014052446290529015}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_5.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_5.json new file mode 100644 index 0000000000000000000000000000000000000000..71bbc42b8d61780033f8bb0d2aad6466b91f4ee5 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_stand-for_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "stand for", "acc": 0.5114443567482242, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5080f912-fac8-400f-983c-944baf9b10c0", "prompt_jinja": "In the sentence below, does the _ stand for {{answer_choices[0]}} or {{answer_choices[1]}}?\n{{sentence}}|||\n{{answer_choices[answer | int - 1]}}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014048804199859329}, {"task_name": "winogrande", "prompt_name": "stand for", "acc_norm": 0.516179952644041, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5080f912-fac8-400f-983c-944baf9b10c0", "prompt_jinja": "In the sentence below, does the _ stand for {{answer_choices[0]}} or {{answer_choices[1]}}?\n{{sentence}}|||\n{{answer_choices[answer | int - 1]}}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014045126130978601}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_0.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_0.json new file mode 100644 index 0000000000000000000000000000000000000000..a67827a2a993a73e899d4bc815500a288a3d7b68 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "underscore refer to", "acc": 0.5130228887134964, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5af00ba1-86e0-421b-bb97-26bf58df52d3", "prompt_jinja": "{{sentence}}\nWhat does the _ in the above sentence refer to? {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.01404771839399767}, {"task_name": "winogrande", "prompt_name": "underscore refer to", "acc_norm": 0.4964483030781373, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5af00ba1-86e0-421b-bb97-26bf58df52d3", "prompt_jinja": "{{sentence}}\nWhat does the _ in the above sentence refer to? {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014052131146915857}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_1.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_1.json new file mode 100644 index 0000000000000000000000000000000000000000..47bbac22889140562232788616dfedb781af9516 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "underscore refer to", "acc": 0.5027624309392266, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5af00ba1-86e0-421b-bb97-26bf58df52d3", "prompt_jinja": "{{sentence}}\nWhat does the _ in the above sentence refer to? {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014052271211616441}, {"task_name": "winogrande", "prompt_name": "underscore refer to", "acc_norm": 0.5019731649565904, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5af00ba1-86e0-421b-bb97-26bf58df52d3", "prompt_jinja": "{{sentence}}\nWhat does the _ in the above sentence refer to? {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014052376259225636}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_2.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_2.json new file mode 100644 index 0000000000000000000000000000000000000000..a03e198dbf91258b30f5dbc952236d4685254aed --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "underscore refer to", "acc": 0.5011838989739542, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5af00ba1-86e0-421b-bb97-26bf58df52d3", "prompt_jinja": "{{sentence}}\nWhat does the _ in the above sentence refer to? {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014052446290529019}, {"task_name": "winogrande", "prompt_name": "underscore refer to", "acc_norm": 0.505130228887135, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5af00ba1-86e0-421b-bb97-26bf58df52d3", "prompt_jinja": "{{sentence}}\nWhat does the _ in the above sentence refer to? {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014051745961790513}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_3.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_3.json new file mode 100644 index 0000000000000000000000000000000000000000..b0292d73e39500d68a016bbd994d10d8f9edc326 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "underscore refer to", "acc": 0.500394632991318, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5af00ba1-86e0-421b-bb97-26bf58df52d3", "prompt_jinja": "{{sentence}}\nWhat does the _ in the above sentence refer to? {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014052481306049516}, {"task_name": "winogrande", "prompt_name": "underscore refer to", "acc_norm": 0.5011838989739542, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5af00ba1-86e0-421b-bb97-26bf58df52d3", "prompt_jinja": "{{sentence}}\nWhat does the _ in the above sentence refer to? {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014052446290529015}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_4.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_4.json new file mode 100644 index 0000000000000000000000000000000000000000..7a141b636748b103fee0eb935260ce1de092c033 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "underscore refer to", "acc": 0.49013417521704816, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5af00ba1-86e0-421b-bb97-26bf58df52d3", "prompt_jinja": "{{sentence}}\nWhat does the _ in the above sentence refer to? {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014049749833367596}, {"task_name": "winogrande", "prompt_name": "underscore refer to", "acc_norm": 0.4925019731649566, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5af00ba1-86e0-421b-bb97-26bf58df52d3", "prompt_jinja": "{{sentence}}\nWhat does the _ in the above sentence refer to? {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.014050905521228573}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_5.json b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_5.json new file mode 100644 index 0000000000000000000000000000000000000000..cf3be36c03c9d90ba1518f971edaecf3c1fa3157 --- /dev/null +++ b/146m14b14b/eval/agg.lm1-146m-14b_winogrande_underscore-refer-to_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "winogrande", "prompt_name": "underscore refer to", "acc": 0.48697711128650356, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5af00ba1-86e0-421b-bb97-26bf58df52d3", "prompt_jinja": "{{sentence}}\nWhat does the _ in the above sentence refer to? {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_stderr": 0.014047718393997663}, {"task_name": "winogrande", "prompt_name": "underscore refer to", "acc_norm": 0.4877663772691397, "fixed_answer_choice_list": null, "dataset_path": "winogrande", "dataset_name": "winogrande_xl", "subset": null, "prompt_id": "5af00ba1-86e0-421b-bb97-26bf58df52d3", "prompt_jinja": "{{sentence}}\nWhat does the _ in the above sentence refer to? {{ option1 }} or {{ option2 }}? ||| {% if answer == '1' %} {{option1}} {% else %} {{ option2 }} {% endif %}", "prompt_original_task": true, "comment": "", "acc_norm_stderr": 0.01404827882040562}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..61535048f2dc784837d70d401f2c0385f761a508 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51bd07680760525f243f592a5599ea3654e1097470b53b7db00540c54300c1a1 +size 4078868 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7e9501ec226ebb4cf97af0dc765810994896e7ea --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed3a0358d97e0f9683916a2a277d388320f27ea14249260ff409ed26162db7c4 +size 4865449 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b62d251b14573b14f5788d66613f02db0dad698b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f97688d2c723e33e45c6c9311f3bf9801cf2a9d89385304da1b778036ab801a +size 11580514 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f60017d3d3a47ec1221a7bcd92b3a302c0a284e6 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7554d2b1981a78a75cd9d58ffedb3680258ff19246a26bfbe7f39172a7f7b28a +size 6759673 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6a9aee931a00cf742ed968330478a501ed1e4ef0 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4037719871e01d2b5ccd4ff8167678f91b0df301fec92fbeb1eb95a4a208b2f9 +size 15330138 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..009e9aaa63a01e161146e39eb11a9632eb027277 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a0534f46c175d3660410e580d1df8d81f2704326e47ebc3e2abf19ef9e9793 +size 17124126 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..73b3487b2286a71b9142ed7d8e3a8ccf69539f26 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b11e51594f3136b9627e5ab5f99542a16b06a11c73c007390c4b39e26ba0d0b5 +size 4303263 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..61494f5bae6a63c440d898f880fc221f283f4aee --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188ac4cd1dacd1228c182fff06dfca6212036af610559e585b88eff9d0b15f39 +size 3354284 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4ca0c51d95e1475cea17b521107e05079c1d6aec --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cb1d3269e092c9bf7da7114806f5870db5e7b9a3951f835cd5583bf5496bf6a +size 8528134 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..014da728e80d7feec90f851e62c4727e02bfcbe3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1cead742c3245efc9bf4949048bffd4283964a6adef84c276f57e4aa9f0050c +size 5042695 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..68280233e4121d833912a5ae5030b4c52f316acb --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3ba26da1a37e9f0a6b8fe2b2e7b22d8287cf087f955de955762bf5b5182ffa +size 17308122 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..97ab549396bfc35c4ee33f03ee3e2fdea0e684e9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a3445a94aa635959d8557459ed0a3acfff858e7c1bfa3f88c6a4740b244386c +size 13096222 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..eb72c715e8fd02c20150b4c986372b06358bb22d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c25f0de596df25a847415cdf6b064d66ab3a06a98ca8d150759df7ff4c2cdf +size 8222952 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8cfdb0d26f0c51e2c1d0735e81f1103bae48f53e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5d588e698a325e5574680d6caf517786519e707ab1cde4c7457bdbafd81fb81 +size 3162910 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b3907954e6baf2e2633bf4e8a48d70a9c28c01ba --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77d66f3e85ae8effec19603abd658747278e0b17e7a2dea3a7cde0432ef17700 +size 8059026 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..12e4fd3d507dd823128f47d20e0b1abd12f1af22 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc0eff874361afac34ee0b50f38e83922898a07b71c90ce7122fcdea49925af8 +size 4778766 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5f1dad7a33bb9f16c35d5b6c906605f0276deb03 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fa54a85d6eb83fa36e7a4bfb57fe87e62c97916328c5b7bfa4d8ab1c3c1513d +size 10820142 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f8783bbd01b966cad9c48f5fb4dde9b1a4ae2bf7 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd00724068295838beb157479b0589c8f6ad29bd70d1a2139b965d3987c1f930 +size 12171310 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c6c5b300912b304d25dd4f78c016642bd231f6f1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81b8b6e91704f589e382b29d2f6febf950b2ed720feaf3b779afcff708643bf +size 3929039 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5f11cf64d55f83513ba9444e69982066faed7c01 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6b055504702ef7c83dcfd0473f1961a1fa921779618dad4cdc902d96e46a3e7 +size 3951390 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c85b7cefe0974dcc22a5ac19131940a5980b95d1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c021539a937e6710c4d6427455ff0e1ff62e42bbbdefdb841e298b1196f291 +size 10159312 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..510c8bf99bf010c50f1be736d844da6a8f1defdf --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6138da8b4d81712eed59a60307866cce2940fcc2c8d61be5897deb16b6294f42 +size 18073365 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..cacc78c2944e432d0ac7ce82a3826bad800564c1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5326601678e490931fe46b75c5c288a602975b87b20523a7b0d9424419e3135 +size 13829554 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2424bee5db430bafbd29bcbdbb7d723ca4955d3b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a6b8545cec04979df6af5fa215d5c55ef80ef7f8fba236af8676f3a3b652912 +size 7764649 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b1ae58742cf7f4b6b56a0759c1a0a89d739ea534 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79a565b77a835dc86914b013f7f766b43febeb6b03b24f135a6a90694e3460c2 +size 5114778 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..884578054d5573353cc2cdeb33161659d12cb037 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cab4a07ddbb80cc341f77aeaba26bf70d2acfd1f7dedff1cebb662eecc3eb06 +size 5959913 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1fa8b948cbd25506442b4375f181863db61d45d1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:912c3320cc628a8bf14197642b06bc8c1d996009f37409944538b7471db96776 +size 14613810 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5f2aac8fc8b919b4cb7380ed730f17b17fd0f07e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe311af2dd271aa80b249c9788512eca78af17080c2f81e469f815a41129a7bc +size 8639897 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7cd35df45147442923ced1179f404586d8a73b2c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a04360855a76f59ad65b7c23863ecb837ae81e75bff2dde69c153405b7f7c3 +size 29849736 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bc4391cc6ff74807e4a44d96ae6f68257fe839a8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d1206bf372989341c4c55ff0f73e79f3e36c96c57095ffc1c3cb08057c90173 +size 22465546 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..aaa31f72d3c0f5f3908f47fae2c95b980cc6e5f9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e43f5d2753781050287958abb5f19a2eed9352e4422e9445a9b10ed6c772af0d +size 7957191 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..db505532e83d71692d2d4ee83831a0315042dbc4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c616058bae40789ec000fa4bc1401de5a0079e65753dbe872c518741a03d0790 +size 13470824 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..982f5d6939cbdb15c770421d8f4fe55f5be2b41f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40934102f60d68412dd140e86b998182d3bebee6e4b857ea79c29af54a0c58c7 +size 19146655 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3b33794e23845f934540f2870a212da7618ab4c1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ff849423160b61fd45ffd3ea236404ea51300d0442a90a16ece82a7621409ea +size 24640412 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..49daa32d13d7ef93e72de9b8e39adbd07d5c0c6b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4847ba4d1dfaa27f1b18241e362731df917df43f730c51d0276de158f5762f7 +size 59747896 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3be0cbaf13eeedcc1835f5a3392926390c229fcb --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69dcf9d6999ef8018338755bbd96269eaf4a51c738ea0cb341787c160c669141 +size 70578682 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..06d74087f66a4d92188b15f5bd23536a690fa84e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d3d5f0bf7538d376d7da42c9803b4acd34f1ee1ec30c7eff6fed50f9b57b0fd +size 7844666 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8fbde73d2e552759c585acad248a2c08dd2c33ce --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da9ec9b588f4bdfcb09fc58d36e877937d30f97d54d7e2c12a3683e9ed37a18 +size 13328959 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ca2794e6a0e1409e455912becf487b1a058adf2e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:749e6f46e75ddc92ab3ce7fcad6a7fcc5fad6eb2db6e8326d23639b48b591dba +size 38094380 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7a5d566b3468c12c79d72b0350f140827e28cbbe --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71bbc672769760ca3ae5a79bb561fd4ff02fc15bc8b0904c04320257dfd2a75f +size 24609633 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b829bafdb67b1513ecbb64c0b121402d409c9941 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a864d7a574b6d858fa79cbe081257c1bb7acc0a5dcfbf96314677187e22f45d7 +size 59922818 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..db79608b60520ba10c36bdd6170cd740108ea1bb --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19541cea2ef6b9c046c08b940d7b49bdbc98d8e853fa457670c0fe99c5f9e1a1 +size 70858782 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4c04d776d1fb1df54456797f8034658028dfdc5e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43375e35add2a5c417643d0814171e49d86c684095817d90f381ca2867f09299 +size 7635382 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0314bd210f9221253cadba2819e1aa8693fcb694 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fd8a7c382d60a0662e4db3d421b73cdfeba38f04f8bdcdfcf03e53fea3aae6a +size 13391367 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b751842615c11cc4b0a59e6b80dac4fb2483be60 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a20127b7f382e85beaf3a971b7fff972add804c25adf18415382ac71ecf1a3e9 +size 19139092 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..35299f09c83d2d17ad13195bc5a1967240acc548 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65217cc689ececf56983fe13e4d298cae5fff52a58ae27c3b81335c9714b3069 +size 24690209 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f464514744c9d430bf94c61152272b5427d6d6c0 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c771275dff1fee50e22c76624cd373d72622e9734b80909b6190b751483f789 +size 60128394 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..00b5febc1be92af675c97773cd74863a61a3e61a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53b210ebaedbfdf026d0b7fd45d9cbd60858e848795c30de7289e2583d261286 +size 71142642 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fe6150faa6b18917bcc4389388a2456c847b8122 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a56ba69871a8cddab40efadb0438b12d77986c2d5d3a5a391cfa88ffbe1edc5a +size 7640262 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..706d874090082eb13f77b22d9ee11873a59f67df --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f28bbcbeecff37d123721f49a8621f8324a7679ce5c2195d1b1c6b695d58990 +size 13068252 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a5757c025db7cfae8dd552e1ee5d831c76f88278 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47091eeb69817111d7e0ff52cebcc39e1ba3220c1c800e639782069ede149d0c +size 37406306 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3f62e6aa17297b386ad635f57402f73833193674 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ce46c204b03463e41f5e4ddae29cc665828608365c6717cb58b058e47c6fc72 +size 24168094 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e0b9104bcb4f689aef3ab387d1aa333692398a09 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:030012b305af1fd0f33441734de868dca780ef5f8a213f0024b3012235812edf +size 58821352 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c4deaa5ac45851bf0cbfa2670f0f2271eba56612 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6946815a2b55497e33148e875443a97434055bd6ee263f0bc489c74d8eba5b72 +size 69574410 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a77863485204247003f546e9473515ba6ae83bf5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3609412d15679da74b1776782a9e4cd98498101a616d3836d05808cb85077e1c +size 8102937 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d9cc27a187fadc29c5955a7f5f7235c23573d621 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cedb00a49bb58b1770679d1046ce5a34f4a7e2bfa9798389496d35c0a310518e +size 13786015 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..29fe0ffea06f091739a60169bb280f3fabae8426 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f9f4281a6acfbb69b85893f8840c13df9a0a502fb2429500489d5db938e373 +size 19659036 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bccb0a02660717e8d502a650e1536cd22c051d4f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0762bf430837e68c5f3fdbdd60cca146eba98aae333c7990fc0012f9d3615611 +size 25302980 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..367ec23f08fdacbd39d18475b8b7a88b689d7d07 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91a21b90d41c639b722353c5aa5a6648afb4de50166148ed3e27338a4360bceb +size 61620178 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..cf63c5a6890c17b60136731c47f8fa2f41911e0e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9182d96011934089618f2aeea4923ce422e74ed4914647b5d5fe97c1f62bab83 +size 72928900 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..75180add13bd2dd76c122203824fee259338d22f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a13f8b940ab9ae7095b5c8b08bba34ffb111cfacf566b304e51159b4d050ebf +size 993137 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6cac8f7bafa9e185fc125cad000977d555cde387 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c1d5a29e079317704640bc81e1888389240044115b85f3a7bb222ac50b7e2e +size 1452307 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3c9cd224bc7882ede4b794e05220c4dc3c35b9a8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:451ac34044b98c9f5f26db28d8f16ee4bd44d7c539c522d74c2b3c30876b5cf5 +size 1910660 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ea36f1f6c1e7ef681e1b0fbedbecc9a499f2a6a5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:141ab939eeaebb7bdd11563c482005cc57e7f7571b3baecc3a92b5f7324c23cb +size 2367741 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..77ddbf67a959b0b32129b39f38a0f4bc57b0c5fb --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34571fbdb5072aed75e270ba22b9841b3066e9431d4a21f6098b92d593373b36 +size 2822081 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5b1f30842d4907d8499b9c4d3abcea66c09549c4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_GPT-3-style_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cec48a3afd01c645eb0cbb375a749f6c86a45b1e910ee756c2773a8ed8137b4 +size 3278322 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d547a4de266bd3cf593ae5e569aaa2c566c9ecda --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690fbff917364e834bd75674605aa260eadd58013b949c4593a3d5d2ab3599d8 +size 1203113 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b1ce878e0baca92bdffc5e1e95712a14e4725e7e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:701bf2d17666b683084b60e3615d0beb15979b4cea0af420cfaf229c978efdd9 +size 1755006 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..90969c29e970c770d3a97f58efb00e5c0f84f1f3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04bb7946742a1e9b16e413bb6c19000536c9083dcb1e91f4bc5a827849fa6b64 +size 2303872 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fee95b3a3090edde90850ccacc1a48bd580e0366 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cb14cf591aefe2a1f98548abd88eaeb15da8b3d35d0fe45432f1b0510148782 +size 2852153 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bb8e5dd2a97ddcbcf902a014f4cd864d715c036f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06b700067b87bb8622e9b0c3c442678ad7217da40f84199e1b9f15ae60a977b0 +size 3397576 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1d4a1ba57b02b240ef5501f74bfdc2150fae7865 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_MNLI-crowdsource_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6cf9b12aff50a34dc74fb0d2454948d8374d36d4323d5b6aed08175f4cbb7d3 +size 3944890 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1a06af8b6f9687e19431b1059e70b6121668042f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2e6b37f98c660da136f4f2a9391d56b83b4a97e1ef89afe880af4e29b590b92 +size 1008029 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b794374dd1f2e88ad36aed663593247cea1b9efc --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e16dd0ef856505913e4d9aec7e3b1d5a81c5564043226a9ad6e08c3a3353ab45 +size 1478383 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fe5a418de889170847306167cced4cf93e61a888 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a7b7213cec208f7406ac06877045affab808d97635ac4d8e65c22c1ffd07410 +size 1948903 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..71cab888419883e7c77e89e04b52d5ad31fbf599 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38355e4afcce64a0a3b0f637776214f78f4eff40ba9b83c5f977efff6236f2c9 +size 2418332 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e24aa47fcc61989d6177476cb6ad9d744c42f146 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607271011f8a70917868d37e3a9c012c51b2951fc19e0fa3796bd318c28279e6 +size 2884820 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8b2ef7a053defd6d5a29407bba938b5616278535 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_can-we-infer_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1e145a31b1241f3ece86687cbdb9ff5d4b09564ba856ff91a46abaec4306ef9 +size 3353179 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a0556f8dd9f88de7c7051c1f49d25efe6a9d2a1d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a43cbe771b0296edb6489c200436830ce464bae5b07cb34ee0a35855c787316 +size 1160158 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2a2d0fea04ff6163e789fc4cce1bc0d32b2d56a4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83bb7d884f85e5fcaba3c24dfcbfa55fd61427b95fa7dff5330b3e09fb7a850f +size 1668647 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..663c87944bb4c828df53a6d7c52be7d8c06cd5e7 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:093b2f438a713ced5152bf12a4410a00d5b0ac6dfd76e1e5e05a8026a75c66e7 +size 2177347 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1f8c0fb9fbaf28e278f43fafabc3bc8aaa3abf14 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d3fdd1fba17d4fe83f503faec533f9dcee02ae66de55ed2a9af3e4e132dbeaf +size 2684253 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4a634038ce56d32465183b6d382ed044bc385d4d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fdc7dd4b54c9cdad441f08763b9a592091433a7e875b2105de908a01b665a2f +size 3188262 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..292c511996b53148e962179de114dbebbda8532e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a44bbb97d019855bdfcd95e2239c21e27b2d3e92d02450cb8610ae186f83c986 +size 7388606 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..805f9f324bb232c43f10eb965b2ce0bcd135c66d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d5e40d28dcf3587354a6fbd28795fa215eb5136cd8912f3175c321dc79b4988 +size 1027052 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f66cb1bec351dc1b0b69b736e59369d8eb5e4096 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa3defd99accc11eac0367be3b325ce1f6b35298e49e1d391e50e58347c7419b +size 1503477 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ca4bfd0878ce4eb2596f31511c0b57e6fa3f8de3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d191942c072f77757b6b697f9e149e27195e3cef146d4c46cd4ccd10615f02 +size 1979886 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..545e4812f020a8d1243aa1cae99492e0713cc672 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d3b4a1b55ec33f1ec480ceeebf3dac52d4f9739610b214307734c2a9ee4d3ed +size 2455395 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..58d105379ef89ebfa1c2fde475c6ea4c7232c706 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5999f2188b0b44718d7543f7dcac3df8b22384e6f09e7b77809d00342ffb5f9d +size 2927846 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ea0e1574a7f059dda3a19b42671c22c1518e0252 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r1_justified-in-saying_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ffb6fa30ce5748ad2b039a9dd107b6aa8f8e448ac7af46b93511afdbe8dd45c +size 6804418 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..12ffc8c63505b9b37eef957d2ee99140cdd5725d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34066696e2e9ac920f0f4b70adf77b5273dadee6de3625457759ce7c12297d7c +size 993538 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b8da1b4e2d2c8d3f4ebf1c0a0dee81fd23978cbf --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:735cea7867b297852a22d5ecc45cb1958778c9f293abbc6654b9a566371a27a1 +size 1447387 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fb7a85b1180c43a171abb85143e5d6095522fa5c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b65d90b87ae41417e92d9b1299715d3be86e4838c275a0440f327b61700d88 +size 1900607 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0d42f2add0c8ee964ab0f5678b11b8b56f84d7dd --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d6f771c5e8f05e602fb78f474fed2bc2b5708a1112db0067c6eb633826da49e +size 2350213 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2e92f6735b960ab06b25580a926dcd623b1d4891 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08c857888a33be9337decf4349d77c9d7c95a9bd2fb01ca4de1b1cedc420032c +size 2799845 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ff1d6407eac1b26c6e3ccb8bd5b8f0040edf5a0f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_GPT-3-style_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380631fa77cdea807204afa6ec11e9285031ac65c62c0e9cbf802fd18db2a95b +size 3250449 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b69421454a4796c383df3c5d7aaca8dad477910f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9103b204a6de60e70b180c4209aa5cb5a60e334a2e94ce9ded3cfbf618df4cb +size 2407026 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..79b9effc40be6f7a0ec61081d035d801ea9e90ac --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6600d531936e7ef70031daa10256ab0157ee2e4db6c4008c3be3cb0877b09e23 +size 1750064 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ac3eacee5d04816a04b2dd097a22cdd9d4203da2 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbc3d4cee4119a73b6d64a3ab77122124994e9b8f2f895e76b7ab68ed3ce0dc9 +size 2293893 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..33a4db213c53efd067fc8dcd50242c83353e304e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f811eee4fd1df6e46cff8cf39a57336c24fe85a34215ec5cdb4e0e5ed73152 +size 2834741 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..03492c2dce8d9bd610bc0df2f6aba84d394ac39a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5bca85d1b22af9a963e2437ef3e1f9ea6a41abcbd1c9ccb1f9c8b396a185a8e +size 3375541 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fdf1650600ef4906b518d2a4370f463a74a59c43 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_MNLI-crowdsource_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03a9369028e68cb6f3f75edfcf8254e4b79adbc8d7147343ba036ff676573fc9 +size 3917232 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7aadd7e71dd716008926282ae89144364de1ec83 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fe84c3ffc30215e19d811e88f3a03f7ef59a3812a4557fd1ae3f2e32b476319 +size 1008436 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6c09bf9a7b6b8dea46e55e69cfdfa522bc14a4ec --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da75649027ff73504f4ed0e65e6f46a7a9ab393dbc495c6e136d710d3b80ce58 +size 1473719 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..68a89be238dc3f6855634d91aeb955279982f9d7 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0678d2fe3150e2aa088e139e6de76f72d8fbbcc87f450a657d99cec7a2013ecf +size 1939163 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..906c5918c17a8296401785b65e6c5f40a7eaa8b8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f378a6fc290f02216bc11c90c91155ddd5e0e1c7136d894e3244a95c768b09 +size 2400976 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ff080a3cab3361175523f0fdc2c3529806c0a3bf --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b830edfb8f685b3ea8d8e05522d4d775dae11916dbeb4e188ef6fbd9d8319859 +size 2862853 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bdc3710580b88de6406113b25f5c7030e4e03cc1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_can-we-infer_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef477b0ccc57ecff616025ccc0c45b898d3b6abd8a3d3adc06f9114031c7e2fd +size 3325528 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f7138dd93a0c07913b6fa9d349a0c992351c4d3a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c162a59ed6cf28052c0a45acc3c5e2eedd7ea58f8b05a03208b3d2081d54b9bb +size 1160584 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..02cc4d8e6a3692f51dae79a6e440318d6ddd35a6 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3737da7224265c2db5b7bfa5c03a1719c6f94ad2d45312502d70995200e9386f +size 1663799 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b0338406b5e61f9973fc94b114cbdbe9b94310dc --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:261893a355e8cf6a68e755dd6f1747eebb833f8c187f37eab780def55cf41dcf +size 2166896 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c31f91c03af767167376ee879ca44d2bebd50e4e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2bdd47b2afcaf56fa13550b61ae7c7c78bfc7dfc4c815bb721c0e8a78c21f75 +size 2666179 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6985385c87545786cc8cddea81bd523faa0a6e7b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8827166947351238474355fbe0c40e4d4c24b5cf0a1c0cdd50a0ecad8b20b48 +size 3165263 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0dd010daa8b0b63f540560560ac3fd668ecfb0b0 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:066046bf4f77fa0472be0947cbc5dd429a89b9027881c3b28d7a5ac4235702bd +size 7331060 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8586a698dba8044daeb677f86c29e98bd5909dcf --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29e23c747f4cdca2470ff0ddd7a6426db51e56e750346efaa5162b548a9b682 +size 1027460 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8429675fb4a2534ebc17dba68b71a1f06623802a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06335fdcec171b20d2b2986c41d9228e73c3bec58ebc6dcb278863ca20d3d38c +size 1498850 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..73788e8989fc4326a73473100e929ea2d496e1c2 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7918345d487416fd3ba805285fb6578dc0a334c5194c8eb91ffd98a0c08465c1 +size 1970150 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..90bc435a740ac09bf7d50588c4503dc2b48acf30 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c30e91abc65c5ef75c522737d55b7365d89cff5e38cb38187f2dc8c58992959 +size 2438063 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5c681fd0cabcbb9906cd0ec5ea86a7303234e1d1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7592caa60861b048805d5113af58616cb3112fd6e52ff1b70dedb8c52998269 +size 2905893 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d4d113cdd61bfbbe25a4adf226781e48d534fced --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r2_justified-in-saying_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb5c916320425fd1b87025a13b62be6f2b4c101b088d79d9e4c6e7212671ede +size 6749176 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..606117898bf1f410d256831d2714dec9c262279f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81395693769d3f3c6fa99004bdf2a88444f72fb63eba7c4a3647cd923c41d2d0 +size 1169247 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b9ed3ab3435a4c588bbef25b35347a04ceeec64c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4646793a6f3e7c90c9b07caf1e2519da24e36980d9d1734c72b895520125eb3f +size 1698748 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..911aa020a0562853f5a5370f198463abd237c076 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:088b8f68cd56e758de436a5f99d04a433b8bc874af4d357c46e876f61928a862 +size 2217484 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..291618b85ca9fa7b4764352a0124a5f873a82820 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd80f4476a10edd2cc181ab24c7de2b959cee0a7e66ad7e82a436949481a5226 +size 2730644 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..847486a8a3e351f38d6c338cf7b1271f411da804 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:341f8031008731b7d91675dcc5d6cc5a5257dbe37649b53aa66cb5b3022bf637 +size 3248067 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3860a6636a1ed1d5ae7df99785136f6396e0d559 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_GPT-3-style_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d81850ca63006a707c02bcf501af0ff08b9799602c048d1654c3affc8d197c52 +size 3776590 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..979682c3caa5edce967d5750ff2afc7ba7e9697c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:804697655d7e8d9bee8b4aa60c84fe05ce7085b0dd033072c176e05272c8267e +size 1421155 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fe3c0c77f0fd332fb2b9aebbc2f2949d79b9d340 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8436189e45f167b68f379e1b616cda810a1807bc1f8dbcf03908bfc6beea0535 +size 2061859 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..591e74059c6a3aa0ae848f0978b273e7cf7a067e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:283ddd7229e99704993aab3bac53f044f5d1a0b6dbb77c9b716816cf5f931d32 +size 5378182 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6f926c1432d2ce09b2cc1efe3db34c5746f275f6 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:561c3fda1cf4e31d727f61fa4b8d402a2b5cb7ecfba96c6a9f25729b1ede7af8 +size 3311737 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..45e0e78736ce1f1f301661c19575f8d589753d14 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f72470a3fd8fc9da55c8da2c8c2f3701e66e9ae5b73ed26738e2e4b2aa1d753 +size 3938396 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f76bf275d95aa70e5137bafd0f549da7c243d32b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_MNLI-crowdsource_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10fe36d9562b19e6431583de8d1289a608fc748e80f428ae0859b2292def15d7 +size 4576230 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..99d79edcb40a0e84bd47c6699f9bbd36c4ebe0e0 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da65b62c747ecc9bb18d0e0658a5093f5f702d70e76ed76697c6c66e947b7646 +size 1186975 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7d468dd7b24aa92a88d314d61c841b5dbef2dffd --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ff419d9bb5d952fbafed00fc306d6aa028a0ca1b634ba004be7b69431c3ea9e +size 1730295 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ca611db1b191ca008862d108cbc58a9e2d897582 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9964c4eb45006217fd481c16efba0a1e2787feef61f56109659f43d652c6bcb4 +size 2263741 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9d22816a58f6542a47c3ec6d08fd96859cd6f098 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c635b38eaf83561f52685d348cbcf9a7553a9afc1adad0238c167096d8cea1d0 +size 2791454 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..96c27579f58e6ac426180ceb80ede425b0b1b528 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:139ff404ccbe36ced7507a8e21b25fb01c4bc0acc07a7d0dac5cbf7294065e06 +size 3323421 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..270598b3aea8702d02fd7c029e3acf7736a8fbce --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_can-we-infer_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1b5ded8476aad7dfb354195403a74debb8a4612e5851f4de05252ffdda46eff +size 7732712 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..db4fccf32c16c2d5a5ba4a58c54f5f47dfefde68 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7caacb39f572f795682dfd243c99052a62da08c1e617dac835055fb31f90b98 +size 1369631 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9d6350c3c037eadbad97ea6a2698f83d3e3e9f20 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe709edafc14e80bc973713053c9e14337943a2d4e6748a077f646830a2a33c +size 1959145 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5bfa366565191c95e6838a038b7ee5ad1ffa7f19 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb8bb75a9a00a207c7bf105750312f25312e9e3d48c877e295828955c3045b2 +size 2538074 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c5148f9abf7267b91f925c0cf492862fce1dde46 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccb79c1afee2cd42b772275c0dd978986c1cd09774aa4404b499cd5874305724 +size 3111207 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9cc9b67a4760bd24bcfd223d8c367916f57371b4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54cf064fadf68ddb487490b01feeb12954a9b00a51394329e60a28ef6c0dd3bc +size 3688440 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4e3c5dd46dd4b12a1ab053d1f279bc2cd126e117 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0893441d07f697cf0e9c818abda8a4a54a3a500762126ba36f240903de513cda +size 8553130 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4beb3543fba62dcb87858ab98e03350cfbcbcc13 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06bbdad0e66b0c99555ab6c4e7d31ddfadb470c188a7cbab0b8f12473877d377 +size 1209814 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..36a2eefef082a9d7c7e21206cda91a9e440d2d79 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5e23213528cef4d3cb477e68dbf82fed041d5a1c4778852d77231e8351e4cc5 +size 1760428 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..830ec299e94bc07ed05eac673ba619b8bf6473cc --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:428a4a5b784f3ce4380ae0e8e1f236e731a4c1f5d7558bb071a0f96e789215e9 +size 2300963 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c56ef9cdb44ddf7371dee0ff379c05b75bc5710f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08016f03e20ccad91864503a78df5cbc958b96c5fed9c7e35eba578b678365cd +size 2835987 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..040bd1d6d21dc123b91e8d8cae55297557ed40c4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f4cac36c585132e6a37b5fae588629bb45628205348942ce1ce5bfe4667da92 +size 3375088 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..55e1284dfe14a948cf1cab278f59f3e4d9a7aaba --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_anli_r3_justified-in-saying_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ec785b32db99e980f4384a79b77b6e60b0f5eae9f97ae6dfd399fbfcd6caee6 +size 7850524 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..93810b15f783524405920527e84a6cf4858b1b6e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2805f43fd3458ae1f1306dd45c29583a814703264714f1a0d175939ef94c02d9 +size 1216777 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4597eda988b2bbb45c2f05df74d525fd4d95eb5c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:016c8ad4dbdf047c57de1ef33b6c555134efcc4dd4866294a45c5d534ae4c949 +size 1670632 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fa69ca26b965e82228172a7d57f45fca1fa455c9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c908368510c6e7056c11357fded4629d735bc6196ed9291b16852ac1a0a766e +size 2120084 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bf69fa30133920d5dbb4ddaedab1f241619a0aba --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3458da744976d34ddb5b76e48643f7f9c3744f66be0f444cfd19ce25f2b806e3 +size 2577615 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7e2d16536d65ab8f608e86d977f2d2a067a9d7d3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fed005f1be215985950bd6ef5061c933b629c63168b22e10c53fd2fa11ba6e8d +size 3027517 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..48ba3b257c5fdb8a8b901aaee5b639d8b393aca1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_heres_a_problem_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6433d404b9924980f12ef18704b1cc0b2817b125156336a6a937daa4cf6a093f +size 3479076 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ec1f9afa1a4e7ff2dec89fa9852a85b940f2d8dd --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd75a1a3dee5921eb7822118a94efe4b1f09c6bb0f30cee1970cd08d7333a1e +size 1458044 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6b202fe4afd5f092d40689a5dbcce8af6e201c37 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d249e4cb9160a5f12bf8517a94967a67f4bb7ef4f7b760f5481daf095970a5 +size 1960683 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ee268ca56518d8cd8b1e5149c10b9e07eb0d6569 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c9f17ed25b36faa0d73354cc9a9ab33a1c15c132cec72132f8a662ea4f28bcf +size 2457251 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..514cfc60ccf5d2b3247475fa8513dbccd7bab2e7 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0254d20d66950853e60ed4e8734b880283932710bcec212e599f1601c62516f0 +size 2962955 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c6ce36bc2062907f2b5c7f77b39d5159533c8053 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1db186be3bdfbf8d4440e3a3488ab5909aa04ff797dab827d36e39eaa2a2e57 +size 3460899 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3aa6aa0c76f584cb0348e97165b6cc3073b53401 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_i_am_hesitating_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f2bf956a893cbe61f2340a606188c70bd3b761341fdf010f190bb313e97920 +size 3960076 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fe9c2678b7b49d29af07365a784837a00c683dca --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60cfc695a0570c8910d67674a19582ffca68550476ced57b650182201acda0b3 +size 1505673 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..96de63c8b56f40f2a2fe2397bec535c306cf47b3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c62ee3bc635a5a67eb91fead4609751f4f58dee1f2a4842c28249bd156b63f1 +size 2032821 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..826a4618064eeb96a9fbb76d9539508beaa3670b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d50a102fcb96ef249bc70ca57a22a51681831d51470139ab81ebfc758640d8f9 +size 2553759 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..231197af1575878cbd270055bf26bd4689e9ed21 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa8d281fb13729a3cbc2b23e32bbb819312e05f683afaca467c355108af1e7a +size 3084300 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b150a771a51a8211d69c925e0494f75019742741 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0be5efa0d513355c71c12a3eb66ad9c1edcc32f44a0236c1a2cfff0a67be1df9 +size 3607051 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..41e7df87fb65bfbd60bc764a47613e8e5e152ba8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_multiple_choice_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bd57a1ca26c3323b5f643e54c62844b49bc6e13b17063bc540d4c50e0448a6e +size 4130846 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3650a97494a27000729dfd7b8fbe554fd09d2a82 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9cfd3dc4198c392c9adb7a0ff3b0f4dd88652c913cfd835efa6704421d3a7c4 +size 1202714 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7c76f2d0a01f2ab97f007c9c00830de5e9bd99bb --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c87c01cc86cc574abc93d1a2e8313fcc0539c90cc472d22c3ff22d86368d81fe +size 1638992 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..dcade2875ef5af3e35b9425e0a27c8f1d0ad30c8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e1c39e9665d942f878c96495e6f63a40c89d68c8b41f9f464bc9c1777169564 +size 2070864 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b182205eb041a8e28424f0f3d53bf2d13af1b0b7 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a725e5f644e7779edd4ab37f242c117de5f2ca40eaef35f2f2b9ed1f42b8c134 +size 2510815 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3d2020f8d934e12cc1a9e10ad40db6e9dd51baf1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad4d44d5e070bc86970fd4b16ac61d4befb61a33627fa415423080d87a0e4e09 +size 2943137 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3100219f5a994b4c47e6cd4da00226a52307e48f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffdc323147f5a97b771b65f3e9e187e9b92009efb4dfb1820e050ca0e80227ac +size 3377116 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a92da5d97ac5039ab86d092e3ca50bc549f65733 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ee595ef23c39655ddd6e3c4a6a201d153005207d26a790137722ed191f75ae6 +size 1187152 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fa2afc17d32c48e01f45a28748836aa3c25031fc --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34e417a4adea797c4204c10652d067c48e2d56109a756a967ec2c90936194807 +size 1557337 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e9988e8da7bc0b2b7270a0cb8c46c47f5602b777 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d07cf03a803ac39694d9cfee20cd32663c0cc8e9b18ae74d793e99529903c8 +size 1921418 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2f54dfa13006d597b1a7149ad61824c8ce9310ea --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d44dd8ba77bebdb243c9f9c809cc85045437ef46bdb77036e90cf5e5a6f9df8f +size 2294627 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7f01c3cfa08eafaac3ee8a4609e282d70ade472c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c857cc860f58ae13bb8f2453b7fa3cb53ac45f6c66c8db00b85796188b52e3d +size 5320292 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4dcc211ad4ea553702f5fa69b1bf8cdbebc78673 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_challenge_qa_options_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b6c6ca51565ae9400abd819628a6232534509466d6c831ec3f9d934f93d8e23 +size 6053642 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4098ad11324357471df46511ed8373e9041218a4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14142227865ed74188c435eb8708d6a99d7a0b4afc2dd8616125afaf5d68f6b4 +size 2351241 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3db26c4ec21b9ffc6d1b27f47916b1f5979de69a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644c9266e232cf6eb0fe0656352d0b5af2f0e80d1df68d10588b5063e0774b1d +size 3174164 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..084f713553df4bf2af8a4cebac7de4a3d0a04bb8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049872e1ba2537193b04323e048bef730e8840421e0570e01bf6ec2a40765aaa +size 8017892 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..58e48fd55b461895ac1017a999b22cf981f0c048 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e50db7d05202f00850aa272b96b1ade2faad327729f4bf63486947e26b18b340 +size 4831612 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ebaf7319d13d7d83fc51a22433158c86e0de077d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f17c26bb464af6f525a7735dcfcf50455cc2705ad07f42a0b7435dbebd32e15f +size 5662356 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9216cda66d391772fad6d360afa8e03f2bad1ca8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_heres_a_problem_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6de42a037a15172030d7fadad9f289299200a61b3434bed05b46714d56bcac84 +size 6494729 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4676a0ddcab4375d33257f88c071bed26b3799d4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9934a3f49968bec45879b3972ba12b759cca16c7bde66414dca818edf94e865 +size 2745379 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0d03e17aa980c1fd62d7d2a1274ddcdcf2452088 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62bea03c2533608e17b649f52134145788cb8f82d9444298135b0bd19db5630e +size 3648128 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..09889a26d29949cb70c37cfd17480ba1e7a5813b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6d7230e51f64e8662b5716d6fa298e795511927f0b6ae2e1dbaeb46960acdf7 +size 4565399 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..75baaae7af793155b041282ef15898162858e5b4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d349121794a8ab47691fb5d778ea6924bfeb0ff3c734b574ec3ceb87ec1b46fd +size 5468693 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..259df8194505a5435c3f64d920e1a2bfe5c58ceb --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:516d242a07712afceb776f4e59984472925445b817767f269a65fcf32d106b15 +size 6381138 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e0855d753453f6f6e3e7f3e5b6f90b443bd15453 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_i_am_hesitating_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdaf24768c127817da282047828379bc5920dc3fa1fbf47e6d2d3e0e250e5f40 +size 14586302 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0b288a4101a2d7ecff65a949eb559bfad3dee62f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1b8d47c1088724db92933efb09b8a5350b44a58f5446417c7a1cf13c7ee72db +size 2843074 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fc7733e107c3bf80f0ad1a19c5b71b51b4ce9a6e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:909513f24f11e43850897e689a48c13efc8acc3ed2cfe53535a2492cb4fada45 +size 3795894 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..770947c06a9e7ed16a87779b312f14f3d9f6345e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a797273c5c5e82ff707b77e52056c586c915617897d47808e4f24c23a1ab3984 +size 4762836 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8a159e00fdc3cda6b4bf8da67ef55df9ee0c64cf --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dd9ae56409e67a5c3f975e56042942accc8f5e26dc259963d281cf87db5a628 +size 5715877 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c9dc7a6f042317c28f6b71bc21cc152cdc59e5ee --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e16eb5796be54a53c02a00b51c61e54759d2c8c0e657f5a15d489aff10d2fa +size 6678039 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7a7b96212511c1d2bb07549a35d0b20d14481b75 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_multiple_choice_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f7e486dace6dfac9359fa3ca45fdacd2fdd82757c41e62488652f8eb4219ae7 +size 7640428 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6681ab78542433cc50e24d01124312d3f1e996bf --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cafbe1dc2a83b59bb2fe2ce1c3e314106769b68c3c10fa00b90132273a76c546 +size 2322732 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..18e67e612861afaf76e8822af2a40c532388c0b9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d93c098b94728ae9027396fe79a3590a37d957645b2de4518fed9de77df1d9 +size 3110021 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..27a6ff299488a02fa7402ce9a0cb5406350b7c17 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a59a0b4db12716023d74a95f658d288d49e2d1bfba2843eff6281516059cb9d5 +size 3909160 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c4f91ee31e702bf8ac57cb2d60b9bbae2869bb9e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b9873e05b5049961d1e6ba3e00f662398a8ce34136c1cd12d445fdfc7953b4 +size 4696184 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d6fbac2edfdd00b216e98b526efd12a9ef068288 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9197fa078505e51a1f23dcad3c8887a1981b14f5313a3322c134126c87b7ecdb +size 5491290 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4b45d0061be4796996a0e9c327d204520ac32295 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_pick_the_most_correct_option_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cedb699c57d7ba8153aad00b27dd7e5a2aaee82727a9fbf6bd2fe7fcdc9c0c2 +size 6288023 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..137d539513343b6e677c912a63b18cfaec391bbd --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb706c06f6796c1f4e6e64fcf04cfe0f7295458ef6de2cbaa522d3373c52b99f +size 4392564 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5c1ba339deae276c211646a26f2f9caeccf3d08d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b88d294acfc8146df9c1ac427e7fb7cb5169bec9e99b69a10ab3305ef6622c1b +size 2830504 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..837c6628f870243a4dfb4466201e106af22a0db8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7025b67a16bef852fbf7dd581450e5ac1ca45f01354b5290357b401cf73729 +size 3479340 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6653711b7ff1fa32df1b130142f82787444508eb --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2737faf1aa317446ca7702f87f59b78ad6d8251285e7ea341d3b6b8ce922a999 +size 4113961 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e9936de9605c0090c972a415d6b96f1a4d680d4d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a94e9331798c6a7935e265b318b071f5993b5270aa2b69ab8a7a5cfb565c87 +size 4757690 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..300a949f2b6c3ed60a0eb556ff1bb60f464ae5c5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_arc_easy_qa_options_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b212065218155cc13583fc0da9b1cf2be5fe8bde17ad40cd607b061fbdd3ec17 +size 10803030 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..562a63c6359d8a56f760146fcfee465fdac62004 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2635376d1a08dd6c3be3afb28b11d39a0f05e53b1fcd0a4d0e521201a851e8f1 +size 3642874 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6bc79967b30834fb3629e2ab6c78eb0cc4edb076 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e3b62250a7b4209d874d3afb689cfd8c6fb2809476a5ecd41fbd5616de64838 +size 5657528 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..806ccc3b727078571fa720af929bc50ae1d640f3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5289e8e130af9cd1c9fa68852c5642a304d2297ccd738a4a774555b1a01845e7 +size 15389500 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fb89450e94592631cdb6f19c5d34669bce2b3785 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c9cc04df0eb71f66b5e08eeb9979494e8227da3ad9266941ea4b99c9303ce3 +size 9728842 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4433d99bca61d20ad0e066f43fe7ebd3ee90197f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:792a982c1eb098cf5ab817af5b353d6f8cff8158055bc6787fe244b66919d43d +size 23538936 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d7bb731348b5f4c0d2886a7abf8bb53498dfaff1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_GPT-3-Style_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5da6d7ca53697eb005b64e010e7a96743e699a4b52823a8bd65c396c19d5af57 +size 27581976 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..88be3c2f802951e2e2c9c23038b0c1fa3a63d110 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b111ce93ee71fac244c95e74e5fd74b24c027cb42fb49a4daf317924400f55a +size 7969312 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f00a66ece6702baf35a3c1fec1960f977d66b9d5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a9e3125416450574c8759b9e017111c31e68099e6e42aecf59404f69d44c4d +size 6167625 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..11b0ef81eba612a30bd96386a98eee45300b5d8a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76cc9bf1c4770e7cb9045f9f1fe2d090c8f0820705f24bb70089bf15f4ff68d5 +size 16743446 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f40afd01a7636ebb9c576b584a704d356dcc1492 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47406624349753a204261f9afa698bf3ffcea849a39da33b3abdb7d95471bb5b +size 10573222 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..776cd3620a34c792c50dd34154b28ec582acdb0d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8ece9c617c55ea5fbce14002349dce60d9d18e92e1296eac5ea036cee148ce5 +size 25562052 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2f361a2158aff4a775923a53a01fc843706745c0 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_after_reading_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c47d89854e3344ea9ecdf603e2f96104c4ab0ad8541d8d8b1ffe80c935f35e7 +size 29939830 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b44499bd6fc859f239c1ffe0f177cc609bec5a29 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5810bacf7431030529c22f3c142033123dba2f0ee2bde287ec521c5d6805a53c +size 4041683 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..737ca012d9d4a3ea69ea7916923c5b905838219c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2dd09a4cb9a1d6701e55df7a4b51874c75afe5608575cd873e3f4eba1ac942f +size 6261125 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..61a66769ed37075244e5de7637fc646511ee8b19 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f86398c6b0ea904d8cc930e2d1489fbf85910d0173562a9adc3cd3ed7893b1e +size 17002694 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fea86fb207313e855ff870506e9098fc742d21b0 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d71959302fd5db2559d694c60b838d861ac7d9613f6e1f71f047ebd489dcf7 +size 10738636 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6202aa017ac01f2f94b5b2498522188b712428f4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86732added6beb9f7adb11b8bf7e85d2415938be1b6e75a3b8ab01d1322f8a87 +size 25964888 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..19760ed93673d24460c6b0510853e4fc3a651751 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_exercise_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:977114f1fd661999ffee2a86d031c5645dd9c270a72ced0b91242d33c7475b9f +size 30414466 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fffc2ac156da861ec9af1dbb9f45f93e732623bd --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d432561596150beedf0b73a6e673c3c95d3b7e8ae81442cb061799a4da4bb829 +size 3663791 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d6973de23dc5bfb72eec07ed2c2457132ade7af6 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f86ea515f07dbf81d534500e1ad72b862fd220de707315d0f5244f853dc576 +size 5687926 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..274219c38a1a0f55380836818980a8a907ab8521 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c84747cc17bd947bdef6f41e5444e069df034995b007a5362551f5fea23077f +size 7732801 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0ab5a92d6d9287015e4ed9d36de9bf9c6d8a267c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a12fb3f82a1657e9bc7c85685b7562b5148b811dbe20820a3395f50564b33a8 +size 9775159 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a9ad4a13fe37d45d3dcd743cf0016c2f27eff651 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e932e4ad2ac0eff7c0bb19382769c04ba661d98736bf193fad941fcdb5244eb +size 11823942 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..545d0162fc3666e0d2a9681b7975d07e87ee9675 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_valid_binary_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf1cb676023b62001a2f143b4c0df1853a4e56f1b7cb36c644287955e634d40 +size 27707448 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e25a03e5011cedff7d22b12d3490ede38d583648 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02fcc1220676f129c388c1160982ad7c85fb334c248ce76da26bb3012e26c104 +size 3862397 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1feda493c98a76b978bcf4e824442d4b0f834434 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b631fcf794db279a6daa9a7fab8ad91a6a78d7c1aba4644daafa2f5a6271e3a0 +size 5990639 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bab3fe3259c6c4ff3e5310b49c7179456ec63e87 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:535118ed29b6290c6e0d50a9deec592d126fd8ae2cbed7aeec62cfe76d0d5dad +size 16284086 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..66cdfba0c10f08874fd0c18b0475064fec910e21 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d02abb80f41c13ad0d3a5532db4b72ce3aae22a5621bea9fbc520ae80e302ca +size 10290044 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d7e62bd3ea34ba0a41d0683c53793a9ea68f1f80 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d925922fe36f0431dcd738d1cfef41029f81624a9bd0804ae1d44287b597c27 +size 12444746 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5e86e5eaa5ffac2f75dcf36ed43809375ec86d39 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_boolq_yes_no_question_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f2c6059f2e0dd8fcdeb3ec15524dce696f4ea663997f56d5a63a946e7d5b35b +size 14580272 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d96a99663a32fc932eaeeccacc7462560e2e5877 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d6126480f6c7250b854a223be1446b3fbc839e24f5a67e46dea97a3fc38418 +size 55148 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..91c027f96b681e9239b6b7e042aae4d30a6ec4e9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8174c9aefdeeff80bbb31a419f3771dcca34843e2843f0e243ac4fc1904994b6 +size 77967 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2e35d4c27187abd622a6f3a3e330de3d4ea55ec9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2475b74445b73042ce421753366f1c2c258f3671e6288892a7472e613b42f124 +size 99568 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7f7ffb40d2362dfebce094cfc566a7748ae684f0 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceea60881ea3be6628e2f8e90373a997099b0f45f8e24f1854f791ee027a6d9d +size 120727 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0ee2cf4f5c0244e955c15ee42141c7b81608d3e7 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60132610d486cce7773e2a3d937b6a2d05ba66853342a20efb866883aa451690 +size 142847 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e1aac84e95c6b5b6f2ed761fa5e564790d606f9b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_GPT-3-style_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7580907e681eda0069aee9b4495073b8f330569f26fea73886959f941f13383 +size 163669 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..806b11a9c8b43716ddc929108e3243bee2b45fa9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b09d06d81d146b2e5697d769fd76799731df2c2d70081347d8168f300891d6b8 +size 66218 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..05e727f552de7c8f5595e084f9c4ab9b04849c03 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc9c0226b8c01919ce546dc31266c1d747a99ddf5a4fc3b08918c855832e043c +size 94141 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..06ffdffa9f4f8b3ea9381aa40d0a0721f89a8c1a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93230600e94637f6b9bd4cd0ba2d9076f9c85006e9834c9f4eff9f3a8faf3ceb +size 120807 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f706ee8f8915a4b849a471591ff172e1780ef341 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed8f5914dabda87ee2b2d3b76a0d2be686a1e41a7bc5a93000a3b4a0fd82282 +size 147049 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..33c2881cafec8204b9697b4d9077f6ec50732721 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be41cbc4227f3fe3d9db2b2f89e13f547525221154e27e530b9d3a1f01286adc +size 174241 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4666e4c6a92b4d3a25527d2d32361706e002f39a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_MNLI-crowdsource_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4f4a2c4d5c89dbf6a91c30e6bad952e00acf88d40a2ebef2be1e623546ef68 +size 200155 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e83d8e2e1f8f3379a14177ca9956a1652ef1d8a1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:923136953b95c338ac56347e22883af4a78ac0a0f7282403ceffdcd541226f19 +size 56292 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..99257294d218e83adee94dec97b0d2a9b00607c2 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e024a413508ec4ad2b6e0fbaf3aaf63ac2007fdb1c64b7eb7cc6aa0250dc5e5 +size 79773 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..598853046256e65b11e64cd2f3983726bac64b83 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6afb99f4b3c425d41d24a7d07f19cca3bf0b2f8c9d6ab59b991fc55dd124af3d +size 102059 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c853611f9e70f0008b375578ae1a87bf632f0e65 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08cfc4a03eae1fd87b3b7e9be8125a87a46de59f6133d84e80e9febfb3e07973 +size 123868 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..70927a190c71f4bf6546fb24a28fa575b5228b3e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:912f5012e96d32261013893e5a735b2ab0d1b54342313401a0437abb95bb8835 +size 146660 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..97c141c43e9b091add29ef7e980cc283c228847a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_can-we-infer_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2ac218334b355db2a42b083c07036efe872375525d557ccb2d957df76b6ab4a +size 168136 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ba10c4db609f54cbde95cadbf2f14c48ab6217f4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfaedc428adb1506adec10c1f8047c2fbb8dbeb85da62254c30002d0f2c45b3c +size 63905 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..359574cbe38cafff662442ac82adc74eca6d9ab9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae71f03468e292dd477a7b0ae2cbc114794347c14c8b24b2bb874d06a111f761 +size 89654 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ad2986f83e2f8f8de11d7d4a3af7e3fad7db73df --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9970234dab055e5b1579dfd5457655c964a8bbfb3d34dcbf948d674061aae432 +size 114131 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f13492a316d4ab8f699613218f444b5cfe75ad7b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f317e5e0dda68722ae9f9e53a622b007cfeb66c0c5115711ad245a5ab120b0f +size 138162 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6bbb76c14aa305f5f56b8c5a8b63169ece37bf4d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ee81dffd66aa3328540e5e673242dee4bb619830815646f5f6dd18463eea2e2 +size 163154 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5f7f4032bf1ffec692e94261fcb95fd531e8c28d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_guaranteed-possible-impossible_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5746883a232d5f3c40c55b37b86ca47369b51a99357218665bf5d11e49b4842a +size 373656 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..452c1a37919e4b4b3db9071b6e0995a698312a40 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b38d5ee3fd0758567280b777076c19d3cdbf1e2ff90c6eb9d218e503db035b7 +size 57300 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..98124b2b65d8d757ad923f94367fac10c0e53539 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7252c476b2fee5563083786aaf939e906357e7b3e672035b2cd41d81b481f288 +size 81118 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8c94d709ae95f6dfe2e2a4226046361dbe1ba711 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c8b4354e1899a09fb085d5d20582cdefca2edb78d4f075367ecec0a538493c8 +size 103738 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c6169e9a97c87264fce39f7b183426cfd47eab44 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ecef6c0545176a50d3e91e96a8c170cf0478c89e007e5e48b8edcc5aca23235 +size 125885 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6f20bcd50cd10184a26b1ba3829b57e9a049b62b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ce36c564688d4c7fc62f250573c309c1fb230612433bda6b45b7fe283f952a2 +size 149010 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..68c2ef9f94d91b9ced375c25f670319b4ba16285 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_cb_justified-in-saying_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752f2f16a8e2760a2a1f30c73926b78ae2c80ef63c1778375cde35d717f9fafc +size 170822 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..72f9febb50a2482736cf42c365f535efac970e02 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c54cccda600ff21fd60a85a6a7ab5c2bc4c68d3407e4800ad078960c7342efe4 +size 92192 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b4df39c61a9bee411e151bfb21b60a67883285d2 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3633e5343982d7eb85c25af6bf0a0ca0e856aa39c5fbf14c49bc97a637eb66f +size 111572 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e5b554fe14c7035a21de9ffdab645c7cabee9a2f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:592b256e50005d87d04f0cfad4eef78ec8644a95b24026734b17672a4d40cc3c +size 132031 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2e7a6c91eed4838ea7fb359a64ae1ef50964167a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf97834ee4c9536bcc552353cec04f3a72315469cfe862392be23c62717caa2 +size 152196 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..45cd67b44dec366eecc2acceeb9c7e617f9792a6 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2459ed220d7d240a804971ad62e833b8a9dfb38f68f3385910fa203a7143a79 +size 172009 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..535f79e220183e77f80cad83f23dd8024a29d41d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_best_option_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:174ba916a1ffe8864a8a93328ca0c9a42cbcdc93216758171cee67b26d83d5de +size 191946 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..81bfb92088c74d79b6e03fd6d5e461b893c66369 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a6dbd5eda7f592cd0ae67913619bffd94ffb52ec4980d009de153de2bac06f0 +size 87834 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..50b68248a5cd3cd3be14a95eb5294436b1cabc82 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b511863f1fe57a2ca4f62ebb3dcce30155494e904262b34974971b0b4168cf3e +size 105035 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..253c5a90929ade13399fa2de88daee909fc42f39 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347874bc49a2d108ad989a819b0da091cdc055a02dfb80caef3dab526f710130 +size 123387 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a6ff281a701728eed8d1d6ed1230ecc528f18265 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a182c195e0da83708011638b601c7679f56ae7ead6d4c202b4a9c51a2d165c4 +size 141397 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a992e55738731f8cce5e5f26dbc4584cf7edc298 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39bbe9c7f14149261e46a778a063c40f59c92d959bd2b8f9d333f4ec47d0600a +size 159057 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..87a195e10b62f4d2db198ec3a74d2f8eccf6ec5f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_cause_effect_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10d26378f5232389483b81cdb7b164e1473dff2089ac9c3cf6c010083a083d8f +size 176868 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0569cf322d100e0aec1a890c672ae0272f3a331e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:670fd952766aa0f498e13af3f95d14c84cb48e4a0dec23277af3854745e4a4fc +size 85151 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..aeca72f9bdd0a3cd27bc8c5512e0ac456f6c382e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e15b2c1d75f3c93e5238c13db2c695ddd42ef08bc512aa180059a552e56c39e0 +size 101224 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ff1ead522b0d40fdd7712b5c1f13a84c961f8c84 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:336ef6b26406ddc3f3c5fc3e344f9e77408f4a4c585c1f67e4c1de00a0d88bf6 +size 118445 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0d4403ca5848a48b497ec9443ef9cd1375737897 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:284bb1aca6e5407a90adc507681af5a53b063e93ce3078029175404241f7111c +size 135375 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e695d84111b5ae3a0bf6c497e33d3692f0c723b9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a50564adbadc39c63ad0919dca253b052eba588ba42248f9491bd4bf62a79f42 +size 151912 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c2065f736d6a472c099314252ed4e00e1ed2e097 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_choose_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29f495b6363946e5850294a339a05c9eb63b0fa692419eadfc773e66eaaa6dd0 +size 168715 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b85125c081c95c85c376f2922da09e325940d631 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad633c0423fa2605279187cbc0791fc9f2657674e245198ec4661ca791a3e7a +size 96729 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c18ef95a87cd2313f27b9e8ea8c4bd0e3e18219b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ece21dba22c88469128044fdb7e5c1224fbbd8897b8707e17d2fd232cf11a131 +size 118236 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4685cb1ec335aff6a7b4bfddb39b452c2ad3b965 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5c77dbef44cb6937ad48103bb832b4b4f7418c17adf358f34851a09c7c43a39 +size 140897 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8bb4908e75af7d45b1ab37213b3ebbe6313ad96f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a84ddd353d39f4eddaa45f5fafe6cb7d8987d89f932429c71fd83792fcb90ace +size 163200 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..dd0e1260d97e8399402fce578acc6dce6b8329c2 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b6fdf9bb4b9fa277a8c6bbdeca42bcd495a96dd3f26e3c41c3e692b6272a37 +size 185164 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3a975bc93ce453ec6dc3e999295c2aa9b5c69964 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_i_am_hesitating_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5de88c5c5d0c8c0849c68363fc64d36626141c0d35c47da2813e6c5d1181071 +size 207271 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..76a5c4a77b546ff928649efe6ebec0607063d271 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31259401475e8e63826e3b1b59e61258ece21e78ba1b2b18ade857068fd5a649 +size 95748 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c4e2b78f87a9de958a81ccc1898ddb74876e146e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2ddb45c774150a5a477263510a32fca2f5702c20bcb8c1c8b41b0acfb1b580 +size 115630 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2a8477b8391f4a78c62cff5c303a6d8ecffc869d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:665b5e0dd20aad0baa5553039105743e2c919a69aed8eae95fab076f4c4ef2ff +size 136656 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..89c52c74c979caa4836cce3a0c5493accdc5ba45 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e78b1496d1ab687cb8f5df6a230145714bdc6e5909d61d9a6b478e890d21a9f6 +size 157384 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..47fdf9ea77e2f2cf4c2a08d2a45ff14023fe29be --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf65c1233f6e4e01dcabf94fa59c621b19fcbd8fa3c4231dc6618fa192765d35 +size 177717 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ba0da0d846581e5aeacb9278276ba221eed31b30 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_copa_plausible_alternatives_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03101b95bd8eedb706e9b009c737c7272f8f2faede384ef06949a1c0f946240a +size 198318 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6a7bdd9c2059704309571f8433f8d4bc1f7d2aef --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:103664c00300c8c638ea7d90c50a248bc3d8f3bb46f39f719b48239ff79e224d +size 3526820 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ec0e87371a04d4f2997da7e645cccd07db122a6a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:518585adf041fb98316a2c93d18c1aaab5636fd80d4703644fec43a403aa249f +size 3931769 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..22afe4a23ffbed56eb5d19c5f17c402dfd993e6e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e300ed577b41a3e2c72616adc5a5216487f2f2843659c3e618d955749625d1c +size 9827000 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..672bcb8ebd3a9ab035c198bcd5a5629a4121c309 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c00263e8a097ead37ae750c9f6068b9c946f857f427bc591823e3528b055a43 +size 5854999 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a3f6bb517f71180ebcf00d4dad8fdf2c2146a750 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd9ee8fcb30622db61524b1aa15dbfa86e97dd9390981beb3d9536be730e3ea +size 6791525 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bab5b96edc78240be2fd1df103637da5d5eb0433 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a55ac3619b245bb62e6864e7286e4515c8eb14028bedd8a76e4f000521ac5a5 +size 7731464 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7800cd13e772d17a4a1742836ca979e21e23487d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b66119d19c479c964488df87152aadd46b8dc874611cba7afd2af4978dae5d +size 3273681 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1dcfc356ae9bb03ca657fa8038bca752a89143a3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c349e4e18aba36985688373b30a1baa5c70865b02b920ee49f061bdd38164ff +size 3849511 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e4aaf7bbef4ec2ff43418cfc006cc18ee1502ace --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23110f52a22eb4a80d925a54b6af21bc2a0401750e92659c3406301e35d1af92 +size 4807979 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..684e0bac2aeddebfc137f2cf26a08ec43e2e1685 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e97a0cbb39d335f25d10efa6962fd16ddf174d0e88ca519979a8b045fc94f557 +size 5725991 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..cc5a7132678ab2988d76a4a03505d77dfb0ec4ab --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e235b9baeae40c66b53bf7ef18686e1b4967a3b218790075403ffa3995aae90e +size 6630355 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c594a1f58fb5d028763ab076f3302fdfa941fc0c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85d1c9cb86df5eb2c593549e8ec44e055e71a73848db355324a04a97c48aae33 +size 7543652 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..cc7bacbf79d7b0d50606781d8a18eea5ee23033c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:907a0911f398654896b88c1f904e8b9a1ade897ef9ab7271f4e2aee07e75294b +size 4096427 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..879dea514abae4ce917bff7717d4967885e51974 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d55dd95a802d18582eacd72b13ffa88321a775efec0de5680f1555ae14f4023 +size 5139552 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7bae856a2c61a6b57bfb50e90c1d88d59d3e65c3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc6e62aee61be932706a02bdf875f0b69eb717e8788a6238f3cf56c52bfcde2 +size 6158236 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..29e2600622af86a85bbce82cdc1020a60ad80450 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ffbb65bd9ee3561bbdcc3a14ea3a4e4b216a54b0ed006657a6f48d669a3751d +size 7185060 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d48eb66107709d5b55103dbd668af9800d49f5c2 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee77d1981230b81ab25ed983d23bd786a685c039e37dac7536fd1cc18cce8760 +size 8212984 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..04c7a3db7fbae4a494421610cdad44a27bb84e13 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc620bde7a946ebba990ca90e90a994586860c2deba67d2d9f7b77b392c3b150 +size 9249966 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..44c05e268a31fed8653bedd49f53357429ed3db6 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53ce82c1a0b25a33246f2d985b1778d6b97a095ea2fc52213e952a5b39f34773 +size 3964402 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..db443c5583763c04332f7536444c49ecde774082 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24043b883836ee436a749efeea6d254621229ebcc4c8db26a55eb5e916ae9fcf +size 5030863 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2facd01092cc7eeff9cc6e1c79a68822b73c85c0 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b0c3a4c73287c153c76bee8a03536d6e90f1e9580b6f8d19ff8a18165ccaa6b +size 6128712 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..99f737d639f5cadd2bc37edb4767b1c32f56f441 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2bf78d79bfb792bd4742883cd251914c762d2f0da2c4cb85097eedcda3d73cd +size 7206106 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..73c463bc0b50d7cae91f476f24c85351fdc25f49 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea390f28dee0f80822c18b23e703cd0573b7563893beb1dc5bb784631ff30a09 +size 16565992 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..49635ef4d4e8b303a808f38973afe0cf6e760b3f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d8f1f28ccafd17f64cbdd1bddd829e7f030f752c405215cf8f648018a677ab7 +size 18744026 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0a391e857d060fb0e0f3db2bb08a4424df74366f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d02d9612d2d0ff98c7d5a8b5cbbcd84ce9539c7cb4518bc6433574b2d44e20b +size 3075111 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4d7dd8616cc7d3a19e94587c203820ce98da17bd --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb8a1efafcb22544c9cdb59544331989cae824c8288edcf10451f33dba4c0692 +size 3498289 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..764b0c9e2a9e0851da7967acc8800d3b17905ffa --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11719b678f2df0c75cd8b9471dbb1f0faac28d4ff4876551510301fc5bc27b7 +size 4292207 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d9109c3ef919830116ef881ce129546cf51c5501 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:379181a3fb85315410591392345535f674cad29a86e09f2594e7fa3e0ec13623 +size 5066531 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..57f67a83d6cd1fbf768374f019eef52b345c183d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eac36d8da40c013337ef3200aa4cabe416855d8181c38e8d00cc0926ea002e3 +size 17561457 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..06511696851cc4c8b55a875f57238bfae00bc5a9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_e2e_nlg_cleaned_text_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4151886a8806cd36e0134e8beface12f2118d030988c3913cfd1d4cc81fd769 +size 6648397 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..092eea568f53f4263d79ba99e92c84d66f4b39dc --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f849238cfc9373858df565b00032013f64578a29d89a40dd5fa1afa51c5d6e4 +size 2876639 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3d7f65a1c9c9a3c1502fa86f4bf68f32d0c35aaa --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fafa1a3dc5f75ab14fa662997493017b6b0b92a37b8eac3ea4c3c5587c47fc74 +size 5092501 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..967eab13edd5a0bf49f586c23f4162575fb98fde --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9b4f6d862b491214836bb32732da9ba4e1bb7d972e817a42363976167284559 +size 7390345 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f4e9500e522edc8f9a2409fee31e4acf9c52093b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f7b02d0e0abeba03f755781ab68d579d56635a1515a7174ea9fda19763c3695 +size 9693386 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..61ada3f751cac5ec7dca7cf1852a1bedf026b0b9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2680dd70ce6d5dcd7edcd441aacec95f271ef05270cb03e179d1ecd04849bdf8 +size 11803584 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8da2f42886c0c81270aa9ae554a25b4e516bfed1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8635a83fe27bd743a27eff07541ae707c2ff0b40e34d9931d034aed911d2dc25 +size 14076806 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..44d78252f8442fae32bea9b82c68769b2c4da233 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fcd0a73b8adc33c496959203e799200ceb5d9a609554b505adf5d6f2a67222f +size 2797418 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..10f7c2e01ef247d6114009ae3d4536297c1f8e2b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8954a1f1203522d2cd97930c5cb4e7e72f6b4f3dbf15830fd7d080be6422e3e3 +size 4970131 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..78224d09ecbe1ba5ee6b97b63652bfe0485c92d7 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b320c56a659520e7a38ec35a223173f1f2caae11a17e773f2667bbcf4dfab28a +size 7238664 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5699c73ec9eb3c0666dea88465d5234afe3b7fbd --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1b90c4bd5083b0f677dba814d46ccafa24e946f0132bc8100d93f78df3306e8 +size 9494684 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..21bb980a0276fca8eac9f061c447bb709aaf1ce5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53fed44658e759960a051838f45339cc254d96db3d22be4665c7e5907ddaf824 +size 11564040 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b6c75b08b830e4d94d1541c0f2fc82c32089ab6b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_DOC_tldr_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:530bea0110597f582837f4267bd64fef42cf262146e206f3b17756fa6d1123ba +size 13794772 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3cb7c8183592b20724b1f452ebc126f4a1172867 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e27a867e653d73dbd513b51e6fd43e14757b8f3f86278cb7945bc8ad653af026 +size 2831745 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..35f0bb4717a4c0280381a9b734c9a4bdbf6cd9e5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7472d4acaf0f5640eeb0fe9d119494ab273e2572a8427dc370b22b56b52743da +size 5027321 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..88f91ccc24e1cdaf2d0d94df783bb61226493a1b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:307940e008128e9224b2e72aade1b67691603454366559ec325db6c8d6f312ae +size 7300668 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a98c71339c94f8a14aa801608e2add3da1e160ce --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80c0749d9fff5ba5f156027aa3ab095a0be9c01c111418c0b3305f9ad23fafce +size 9558981 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..942b68ab3bd72d39c1aa55f773265158e48daf0b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202a6513027cbaef5fc0d1da0d1960178e21463b0e985767a88fb13cf7ad2ee8 +size 11648921 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e7c28b0bc823b556015d6bb32dcb83f72d06f6ac --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_article_DOC_summary_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:227f723526cf1e633fa4ec057b278381b7601ca26d0aeaab69e84a308690ae6d +size 27795070 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5a3f15216ec04821e6d215ad19d8f18e5da408b8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad57ebc2bfa9e246b366a77f5fdafe3042f722f5f09026a6fd92cc1be209a70b +size 2816759 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4ed794faaa079159d7ec2afbfa2e6730cbf1b928 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5371db2c2ce6786f53563a31b5e1c3a8e25930b97d775c8172fb38c1e8f579e9 +size 5079872 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0cced22c21f445669c18564ea73965dd554739d5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a13b2f6c4b672be604e28a7f9697a1c4263c39999f90b66686b2a7d377df04f7 +size 7356702 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7d243545b5208c5b1c333e27f2e078656f388e2a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74927f40565e3ac1c06bdcf84c7a356273b1f18e0dcc0ad96b0c73a590af19fc +size 9621300 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b2c83b62c1928d296a1bd1efd4eec1a4a54fddaf --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46a009d1cc8431dbc79c597a4b1cc9ee33191e4159361271c6d0bcd6a8652c7c +size 11652675 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f806ebdc16f3fc5764467b941d3eb85e4bd0971e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_DOC_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af15bf17569f5534fd357a9c36824d836b6089698f07decd2bfada049f54e433 +size 27764780 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b3e4c4e9e917cb67939660a8ace198b1c045a1c3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1449cdb8ab2dc0f112faa5159f86403dbf85202f2b0be7f4ceae8ea23a9ca6b6 +size 2871565 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..369f5de13ed8dba5968eda721d00e42f6eefa411 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a74d5ca2ed646eec0785213de36ac07b443787d86637d465405784cecd56f798 +size 5114527 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..de7c3c941fc570f94de555bd531fde93ec07931f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36e7cc92a279cecec05828c597031993e43f783abc593599616f611fb8752d5 +size 7394365 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..def2b1fa2fea52c36dc563914abb0393ff6ff455 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e233ccebd63218083c71e4e496d2c4648226f07ef82dd8b6151118df9a396f1 +size 9663369 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d5482cb04596f37150c79cfe5a65bb995ba323fd --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8dd40a5807825a6e88643f30a9c7e32c2c398dae3900a93ca39f51eaf2d66ed +size 11751779 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..57b8a8a7fd4fd3687e7fee755c5ab89fb04a331c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3920f46c50cce3bccf61a507e15a8cb5e71f0f5e3316355205d1ae7932f1ac2f +size 14018647 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8a09b4ef68a2abaed5f7ddf1e7bb367797588b7d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6c3acf3b088f3f822b16b8d785755a80ca682de05cdb3eb76b7f48659636297 +size 3434976 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4f84af513937e7df19e44b0378a979fa6fbd13ea --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f93f3d5e15a10fab812a8d8b463f172941a498c5a603c32ab41d0dd5f4d87144 +size 3327689 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f22ec5c0e6ff798a7d6706fc450a7a4bd347bb6c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d27a6f4a3ebe3442c2c9b8487f4f3a8fc60fe27f46ef352a8d9d20be0889aa6 +size 7693162 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a8f7cbe83031a13ff2b2f9687d0c40f70d490698 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d75234d90191aade3cba6110a1a7f53ae1799725425da57299191c070bbe63a6 +size 4447276 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f8665588f9177902c407bad5df08c038b992aa2b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff13282e97cfdd42ba761f089c0c84e3e875d49ec9501613040c053835ac84c +size 20275216 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..137683005010bcc187e9d7f1850ddd9d49d408c3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_Correct-the-solution_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8b5e00a5cb220774222e723855f9292f0efaf39d24cf919db200343a6e7cc39 +size 11452982 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ef5b6618a86c8a066b8f5fb89759af4242cb38c8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d568dc0695d0c0c97bffbf34b69759c2059641939832fba48d9b3301c0b3511 +size 2115935 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3c482c3df9decd346f5fc00abccee2b98a02e3ee --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97bf18b2f511bd1d461a4f115da409b0aa8d1d71c9f6a47fc844359cf871a4c1 +size 2900909 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f9c2ddfb06448afda13d7445c2f36f1d54ce5a4e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6334ce2e3eeb7d1d69a18eb6fd16400d982cf25f724bbbef50c646e43b098843 +size 3683911 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..90c747466e03633b4200ca29408ddb75b1b3be4d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7de8a8dc0f6d136b9ab5f6d439f423cb5ec4b2486905c541c4cc7c7749f4de81 +size 4458372 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fcda23fa82153b5e979b98d1fe53e1e9d5143073 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baa4f7f5d1a26e96f920cf1aa5b986e9f369be141fa5884e97401508b5a27cc8 +size 10509032 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5c33e1ef8ef9942792f09fe19d762d1c152e24bd --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26e9e5678a93d5d32de208b87c6c37fbe17aade1a41cc2ad375f43b0838a2c8c +size 12081354 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8eb8d42879b33efd20acecec7c3c5755cf3c70bb --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f04859b0ce1585c829b5db8aa71e116363660136b0a2bab4969de684de04542 +size 2794872 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5b8da1eb5fa77626f6db1ee8272faeed946593e0 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3966697592b675c31844325ab3ef2cca0b0a6378879001df9c79485ad611de75 +size 2220287 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..79af327cb1993b74ecb5fa20f16406f4d7ed101c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7f5161344ee31adf5a9259b6eb82b3245deda8f28e4da630f25e32a03d917c +size 4010296 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a0bf0a95c643b6249b50e7326106a617bcf2a45f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d44979c014c8793bab5e0bfba7756ee8789739530accb1a072e1c6a601f29be +size 2130376 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..abeb8d853e01a01f1a6690f565bae683e74245f1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55afb9158603624d01654ec5449b5526547152cca3561fcbd591bef3dc469479 +size 9600512 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..df09d8ea6bb2d977311d267fcf606d35f467cf4f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_no-prompt-needed_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14e46b3551fcea7a966ebb015a1a19fe86472b03f412b4fc291ca0b8f48d8638 +size 5338182 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..db38eeae18cb274507f443e92f90086d85bf6131 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe6d1ec7b37bae5b3662ab78623e555447052a034a47a8948c3965986ff33ce +size 1864129 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9f97549207fd288a9c9ac86ed7c644913cf07e4b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43ceb9eb6674e08f1e0ece2f30caf709e6246c86004982794a6da316252de24c +size 2557203 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..244b4d7ca5474352a1fb8d42f648c23f03262d95 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc9a6daf42aae1f45dd9447428bc22d2bf83c4dce283fc8af663b71c46696b8d +size 6496610 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fa4aa431e4e02f568c577029996963df9c5629d1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7413fb8d15e43140b3fbc634d3ce79de3a58f97b990cae79218b7a5282a79749 +size 3930866 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bbec448a8c683141ace6880b9b83f584c7ee8b08 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348a2281b29ccb9ad67d8f97965170399a5e45577b11c2f1548a9c496fc3d254 +size 4635110 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1669d8c21b3cadd508dfcfbfed5ecada9120881a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_pick_correct_choice_index_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba929492e0b8eb1e7d9ac1f3516412bf49ac31b1e632af795fd5fb061468306f +size 5329371 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6a893d853114d44c4ee10ee9045aaed6af4fa8f5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3b8a0b155d8da312f6fa0bb930f77675e05d7659329ed9391bf36f169a70e9d +size 2257428 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4661820c077f967820c92d92fc7b6189bec4c235 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8cb1560e7d705d28ffa3933b9c98942b4d40f86ba31c81dfcd791d20f6c3994 +size 3017469 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3596cb3cbee432cee602d1817ccdec3a22fdcd64 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:208b76644b20d632d8ba1710331ebf73a60c939774d576a18febc2c25c0abdab +size 7547344 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2a091cb00756be680ea72e8097107bdead00ad27 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71ec4f0743d63d070f707e44550cbdcb7c6c77dddd4dcefd3f0779a89a78e1dd +size 4518260 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0d19f75e32a756b86a66602e47e8824f710dfb4e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbdd2d84a17cd76084fdf3adbad68dd441f50b68c7f93723551c61c278e0a1a9 +size 10586362 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d26133bf691ab75e5115c52f370beee8622cd159 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_piqa_what_is_the_correct_ending_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea34f76494e7e12bdaaf57264784bb1b10cf733c1143a0f142bf0db5fba58e2 +size 12108580 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..49e42788862569e9fc3c1784f7299c8884f4da3c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d552980c3d704d2e555170700329e8b25e2aaa5cea4732726dc0796817593b +size 639690 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fe911d21ad8477c8d28bb1cf053d25966e426105 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d97c3f1953c7a1efe0ae92d86b25b85d8b6ef5ee2ad42fbc64caca61847b23 +size 754663 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1bd635f3f9a4d3e327eea1ce11e984038ac734b6 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1000decd81247bd6eb3fff78f853ea6eaf335da86c31e1b82ab229b8ddf201a1 +size 870936 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ea83c9fe374d19e67bedd31fd82d236b2be0d8a1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dec33d1b1a0c9629f10fbc1c152acd6265b608834994de9df7025f1182bef6a +size 985361 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bda0ec0c78ef44176c101479fcd319d4cdacc529 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0508c4847d293ce31bfbc2ec2beed57060c9de322e2800c248fefc12dc4ac202 +size 1098034 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1453bd42fb3767ef85a8ce862a7d5c24ebeeea01 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:685e5947698e0944ff2b3128cd31736d52d52dbe559223d054928fa95488a1d1 +size 1213221 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..330dd328a6a799e7b76f21f04db9e46ec51aa9b3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c935652f7600d97757d050b4c321f3b72fb73d56ee6a82bd7105522ccce78a8f +size 1182257 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..614ee4cdbddfab76346300ad2fa447f86167aa5e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64f7ecc2cb0a3ffb9a46490eaf8c03a1d9e6f62e728f9f6275aa05ab5318c5a2 +size 1779078 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c70bcaf40fdb1eff22246f540e3967ee8840ddc5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b589d088e01c154f70375af01d436ec9f070a848dc94fc28d794569799a4087 +size 2388274 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..380b1af9e2096be1de4205d274228fb6c7e6f81d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb5a83312d28d4a27e32f7429dc3843fc60e09a80a97ca333144a64912674e4 +size 2973244 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..40730ac33514624e07a1a7dc7cb6bddbb2dca70d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2347cd80cf23e7e6f8738da5cd7e72e061f03932a268ce1e99552549733aebc +size 7111284 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..067e39ea731fab1ff27e42956f592c415deba2ca --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Direct-Question_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b22ba4e0b9cfdc78c620eed2dc8b0c90c103af77f5ff5b498b7c93db924b288 +size 4144456 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c2cb990853a67db9e7097583bbfb55c23de07e47 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d255a7b7752f7ee9c28eb56df1d967a649bf17e164f14ef1c0586016b5e363ac +size 1328881 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1d45f715564dbf8a74dd585801437a152ba30fed --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b5981da8c0bb305fcb762d0461fa151749c60ac1409a410bdcea819a24a650 +size 1524460 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3f86d5a8123071d283f7fae5ad6d57f0b536264d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0728a1c76a6384123be59b6e663acd3ef374c11552d201da235e5014ce6cc47 +size 3441074 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..cea32419b67c14908c9c9069f0e01490b92a31f2 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49cff6c3f2820ae3834dff441a05ed991c94c4835e13bfbc874abbfec75fcc71 +size 1915623 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e611d301f5cd14051a382beeac7bab3d5bc4f5e4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:503618147d106fca98ee599c86611e7dc6d99240205bb844bf0388d6ec4d73e9 +size 4215074 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ca1178f0f32a5b9809a84275a26cdc9e5bd9d4e2 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b001b8a9ba1effaf90247189cde7b06001c28353d43f0c0609dc624cdb3a45ad +size 4601914 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5219bdf8e2a08d11f1579f34c83c9ab3e29a253b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66c48adaba62c5d06406d1ad46163542201c534432203501c52e0c4dfcfe593d +size 1935083 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b641911e7de95920df66bee49ccc85d042226e03 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e424efdeb2e4db5ec9d8b6c17ae7dca915fbe1b8fc693cc604fa39c1e49a0665 +size 2636383 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..df5f030cd894aad5517a2701c77a84469b1edd8d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a05a05033f8b00076bdd5508893daf311f0505ca58b12c92abcb1d54e64844e +size 3349491 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9b01a38ccb48e9675da971a12c5fa6d62d76d661 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09c831c72dc69f1d83ae7eff847940643482ffdb20dc8cf785dbb0c5ebcafd8e +size 4038994 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f55276d39a39a91405f2b70d9574efc66fcfe6c9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dec3378fbbcb5d45945e3b93885e8bae56eaf1fd2856639c42d8751ca097716 +size 14173296 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7f170891e34f96f42089c41827dd1bd836b0f5eb --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice-Question-First_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:317033aaeadec9733e6f44a561fb33c4878bffa48606929d2a578beef0c03e3f +size 10831154 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1ad924e0911467a8fba0bfeea394dde9cf734cc4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b45f0e8fd566a601671dcc94567cd044a27790694e6b68547c769e917463f1b +size 1869995 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ccbd369593eae033cc716b92fe31892ede29cb07 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92ad6396bd9aa36585b9bc9fc8a06f93366fa298cb512a3f0aba51c6140dee7a +size 2545318 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2bb850fce7f13a93a2095d555eb3cf4acf074608 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7064e38cfda514a37fda98f054cf05b5da75d43cae148bd71847575a16da64e2 +size 3232418 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..76ffc60f2a121c7ec17091badbe130b05e92939d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b16ea54589ed94cb904f5201888ecc2266d369408555b2393e2b0c0eeceaf88d +size 3896122 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8135745fb9b1367da2ff9f3c0a68b2a251132aa8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d16daa9de384b8288fca123a0f214f231fceedc8ead92a9efa4bb2b955982e2 +size 4555573 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..95da110d74d05174f922f00ed10dc34104689bf7 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_sciq_Multiple-Choice_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1673cde1005c80d1a04f8351d25652d34fdcb6045301371f3f975040632263f +size 5220617 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f30ae1c40aadc6d7e3f6f6e900c281623eac3a05 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:643cc0014a4fbef7680fc86fee92c515d7823fa97220e26d33599a4bb9a0f09a +size 2212081 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a6e1c0dee112fcdc63b31966ed27219e4dc25ea5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2412cb406fa5d52a45840f6ead344117255de93004a5d9f9b3d338b513eabbe +size 2972394 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f14844fce1d1e91e70a5bfc18c14b4921fc02ae3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b948afb36f031f6443ce64e5b2f920dc7b716a4aa6333200cc33ff018fe4b792 +size 3724140 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3dade20c0445822dbc22776697f0e70bf47b54ab --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7333421bee236f8797381c0ba74598a762568072bf9ef992acca2ca4c69524cf +size 4475631 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..422b87fcc3c96cb6cde43366a1eb9f40e51e2db8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:448e390800db3ff492881ca7c7c745bb1054887af2e26984f6cc2db6eee3b2d3 +size 10455326 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..68f994b10143051606194f5d374ba666cb85aebf --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Answer-Given-options_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b53ff2a5b197e9c1bf2df38e79762807562232a4caca9905ea92b2fbe26abcbc +size 11956918 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a2fb2ff582318d43f289eee0f0aa53db721cbafb --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b67f536ae678f0a3e229345a02b9d049a0ce3894271dff622a7e3fdf718994 +size 2359581 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e46649e45e7539c05b22d797672437aa34152b99 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a052f8476ee8431185056d74a809db608ba9e504548fd1befb6cc9f544aa43a7 +size 3190666 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8121461b7b41160801dc53c2b52a23efbb755127 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f080297fa94aca6a736d1d52522378ed766de3abd1759f9bebd510dd8f0abea6 +size 4013822 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..81e7264c6f67108354cc5bc8f18336797a6bdc8c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe49e6b47ce40a6c2303c39e98f1330ab37f26662552765300a57d0af40f49cf +size 4836487 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fcb41b47ff9874a6c1aea0fa710a54dd284b1dc2 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a13b017ea8b698987d60c0a231741a0f1cd42992ec21f97821d86a353b21c81 +size 5659755 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..991f3a39460dff88450330ae304d70c1994a4475 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a778eb22d88eeb1e57c27230fd8f1c4e246db5197b8254b7581c02cc1ef0ac91 +size 6481806 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..154339bad712d9f4ee8253ca2ae825be9e6ee1f2 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f170d0784e19ff087ec9b77b3c0e14e8aece55a8c3cdd40db2cf2ed9136a20 +size 1877837 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1a25d40ba19fcb668b5f309f62d450ad8aeb79cd --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67bfbfb262463e412168d4323a01e1a36b33957591a2527ae20a3b8dfacdc6e9 +size 2433701 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a64a56f2cbf4b2b61e3abd2ad956fa7171ceb3d9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4285782297da685e983ea7142883b352c63cf979831a20f03aef1762d1d5123e +size 2983975 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9acff4795352ae334c7e08c8d1e28e67f0037cd4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055f2a9a2cc3c9bec585204c9779d11bb68195b5c269d06a3fadf11839169456 +size 3532410 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a50e79d60f71a9bec23dd0b05567b2c66f68f49e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d01845cd2415ae6d5f29aa1753727222b41fae2ffd06d456931c39bada4e921d +size 8161748 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5c00406fb1156654c3519e8b51a7817d4abc0ffb --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Generate-Ending_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f193e9b649b2723471232cceafe7f4febbc1606152aca88d5efae5efada0843c +size 9255650 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..eefd00d9281b1f971eca468e7fc18c02e5c7c6b9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa019f119b3ebfde3d99d8931d94b7ab181daf5998331f68583c6d33e26461f +size 2369153 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..79c61e02eaa000b4f9df0709cf4b06bb2d89624f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6652d5296342daca65ebc2e94a94b13a9858baa99d56c80d7065e42f5cca37e +size 3207774 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f9e6d1b8cc242bf390de53d3a39a68088b54d069 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcea09ca4c3c85a308088543ad477d8cbe005d5b00ac5df5859f95dcd914aac2 +size 8076952 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9c234cbe1e949cebbd26f39a1a9024128c451c7a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8312407ad6b7f7242597c69a1381e66a3ea7647307276a8d17583b0e200ef906 +size 4868739 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7a5877a8cc7b9d0d4f7487e76c1b79da9a5df38d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c002363228ea456baa0db1dcc1859ed276386afb8e45f5bfdd2711b5e8ec3b +size 5699412 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..353edbab0e206c370dfda2ef7587e7dae60b697e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de0e8fb31bed06a257ff2e3f229d5cb22826676ccdfeb4614e0582ff4fb39624 +size 6528660 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..00a59d43e5b68fa7f0f2b274ef12bb4ed57683a2 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80ceeaeb90fc80c0e2634763414e17e4f73c672a6046309850b5549dc83a5de0 +size 2342873 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..34ef42848bda6084a0148edb64b548202f853ac7 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80aab9bd73f770b659950cb0bb273529b5a0be40446f4ca82fb2db946cc425f5 +size 3154983 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b3a6813401b304286b3d050ac770a63f3d53d923 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae56b0e6f2317bced95f6ef6fd2a5319dd60f8d9f18d9fac12a09c3899dba1f +size 3959577 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c3de7d9a16b7bcc046f965e242f25082d69863b4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddf23af9bf138dde28b2d7627ebf8667b6fefd8e5a193b438059e92d7f13fd63 +size 4763504 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..62a368d8e29ed9d1d1df39f812a0dd25c28cf7c5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c71037b3b0ea7aa3f7eada3b356ff08294b5b661abba4cb8acf82e0fbce892 +size 5568119 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d89a21a5b48a7adb4eee34b554ad11a10a601887 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:141ef6c88cea43cc5ac70d8bf98b4a60fb84a9f5cd06c95c7c98071c6659d57a +size 6371237 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a31d2f1dad057af5ad583a3ab8c912d51758557a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7117ba22f586ae3e71a96aa6489971acc7b2a5a6ae16fe758fc51a1f69f70261 +size 250492 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e87ace0fe704ffed7f076ea5fd1df6ac589a48b3 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:134c020046edf7a502379fd98682266ab21d94caae8f09be97ce0c5638965e7f +size 351378 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..37083d0fc9295eaa747f35f5ccbbe285e0010915 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cbb71463a4bbe41ff995ad7570bf7329c7ea4a62bdb1f012625584750d54da0 +size 449687 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6ad37dc37d10284973c6ef999a01ffd667792d6e --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ecac59234f709fac4dd6a1b76efa99ba9afed208d40e5c1e96a2f5b09dffba +size 553055 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3e914a6b59298252c7955de8ee2efc32f40c2a3f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:530e15b41748aac024b4c9564c21cc13ecda1820568dd32e31379b59bb089a6e +size 651695 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c2b93e621ee0b455a81350c4ee0477f29a4316bd --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_GPT-3-style_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5546e25e2457498a77a80d866406fe72e8206addce4084ea0d50b978f8edda2a +size 747605 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d511aa36201e093bcce29e800c5b1f770caa7554 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:381dfb7992e61072753f7568b0357a7d431f5f9c4e8461298672dd7f56f32a44 +size 293154 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..768d43cb08704b02163f0dd6504edde117f2d913 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7295f65226951d083f27dc918815aa09807e087c2185a267aeea621a875df0ea +size 415396 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2ebdc77526d2881975b5d9952375ecff4f9c06d8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d169a58c886cfbc4fd640ba9a37a57830b474e00b0cb4f25d54a52a4a9106b25 +size 535362 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4a636cd6710e406f0f9b0c4eec01f461560589e8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:407431f7f12eced5dc171bdedaafcf9989878f8b8021c3b98bdcff397278c580 +size 660315 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a41435f2ee460c8082af68c024b22bc1b852cac6 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f94b7c029e395f0d39a56f993b22f9dd953c45c69d69fd4916d244dfa697d8 +size 780545 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f0f156e80eda1a5f3e703af4e62d606bbad18de4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_MNLI-crowdsource_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9050a3b0dcae1f90daa3816f361d888880d4435943b37c180df325dc8d71c36 +size 898052 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bd6cad9f3e9359d0378b345f46c9bfcd068ff1c6 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4775de9984bb0be99f933f72c9a7a7c72fd5b6ac2788832dca6176a39207b95 +size 258482 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..dc490c8823e3c52cc9e18cab7366dad11d0e71a8 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99565433992ce474d10b85972eae3208827df0d126657782b1dfe3b9189ab69f +size 363081 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9be6c90c5501bfbe5735e3718747492429e51e07 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34db04bdbe388ef0ef1b3c225f7a3311f07b98e45378de3f4f211875af3d19cf +size 465284 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0528164aa03f64805820fff0d94fff572a8f203b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:060e85cd11ea3715395a7c4ba3b2643d310b6b8b385595cbce6946fbb9d8db1a +size 572529 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8c59bf74b41ff5e4c3a449ab77552a142baa6eda --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:761e966a2826f940642aea04baee1182334cb4a99752358b1fb564884883dca3 +size 675010 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..83c7903109a3e5c01a7079f04ac58ddc9efdcb59 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_does-it-follow-that_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2861ca5183d751213f95c44a506f19f4e2e25a9fa00773f29388e44db8316bdf +size 774795 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5c63b3878b86790c5bf2b8a938e47e32909c9013 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e41fe524e7e0af13e79157b03c0d46cd44df3257a50e8b0aacf37829da4ff0 +size 522586 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ea234eb5634b981992b6840646ad809f65da9557 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b079826f26f7dad06f169777437bbddb2da25c4ae884515f0e0e3c2b4d57e3e0 +size 367783 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f05befe90169ee6569214e0ff0fa7ca16f71a493 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c83b1424623b84beebceda091c77cbc72acc2a699e8aa623e812ee3e49ae51f +size 471927 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5e6b72e12fbe81c2bcc62ebaa6641688e8b92064 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f05dd62606416b5ffb00dfe7c4aa25370cbd533ab4bd20172c6c458b9c01745d +size 581083 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..911cbc4489af255079cd082f0a9442be01b34e6b --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:450ea22b57b40d8e95de3e300898e50dd8cb10f297ee880f87796ec077503a73 +size 685525 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..890371a321c5638b95bb37c26033f2201b15f93f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_guaranteed-true_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd5775d46981eeb0718b29f967c437c63fdd7a2dbceed617a856916f8973e2c +size 787230 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a3ad39c16f03b28880c86b47c9184639ef863293 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f972b39631a1b8e5050e46e60cb633a23164334303078db63d5f78fb434626e6 +size 262392 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..55ec3c5c24cf0de04b7ba0f5511c556e52f08321 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55ea3c40ad0c6b54c3772ce993696c42437b8fa7bfcca30fc4e0e7e2f8caf85a +size 369740 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bdf560107282d4f9202a54358b56f2035c1e49f5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1213da2358e3c5cd69b5d24d6dfd3c710e00edeb31964e0a6386d3e4baca7c7 +size 474699 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..507214fb87daaba7e94eacae175e1f4163187112 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16c8a4543633d287bd3ba70b191b97b1077c10eb25391eb50334dfd595afa572 +size 584676 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3506217995e0f7969e1c7f0b56e6c2387bb46623 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd2f10a663ad2383b53c034761b843579998e14d697c108d093e0f1f851695ae +size 689954 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..911e95c5754ea77002b75b469fff92e26ff6d9b4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_superglue_rte_should-assume_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b547b64f40804f2aa3ef5d357eebf725e9f5eef4f38fd93d94c71b3f8159e0d4 +size 792481 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d8a911d26a4c7a561592933fe5a079c5e3d20a56 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6c79245c12e808adcf0f0fbbae5d40825f94dd8562922077632d14543263efb +size 1039200 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b23d1cc013d7d76ba2f8b5c64cd2a2884b989aa5 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44a2d6392cbbbe4bf9d35c04ca422281e1062dbba0cdd214d1e6291bdca12c5 +size 1300082 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..92b32d449c5a060d8e152cce9db0f3a11232183a --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e51628364f199b14a8b8e8471797517213ddb9cde98b7b929e4a5739c67e24f0 +size 1561212 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a99ed8b631e0e141868e0f11638242afda084f1c --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de41c4d50bbb0c52c713d38576033db7ba01213362e8a65f8668304b9c2e3360 +size 1822653 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..24bf4072a205802ccad033e21c6e17fb1197db03 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24887f04891a5e3d7679e19fbb3229393cb2c53a740560b822fbd38e14dc90d8 +size 2082948 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e5d275803fcb0bbe68c8ee38daeeb9fc97792bf9 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_Replace_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa7195b0e9ef407eba6210a80e1eefe45c00002a5d25dda74fe70ab7706a81b7 +size 2343672 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..46331a0f08cbb922420315e1697af867212e4d49 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:536ca17b7abd74231bdc00a9ff4b4b81c0c6ebe48671c91fd169939dda15085d +size 948112 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..666a6e9f4385ddf70b6f0d84cbc102b79af98911 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd816b396eb928abbba32e7ad79cf333b6c6705fd8faa2269913c2bfa8abe3ca +size 1180416 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..826e25da1cfeec09a27b6aab74376658629ee622 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f41727c7bf34e5555e0679d4a50139ede71fac9837e7e9cc253fa0c117e4a5b2 +size 1413083 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c2d49503ebce726f5f9bbf0d63ac2fe9f71510ae --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d983f74649c2116dfc2679b269ccfcaee031c6fe688ae4899aa39b5f2a944fab +size 1646070 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..43b930ca02adac92ce44184fa5161e2febd301af --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34640ab0f75ea131c1901f40e7bc69f12ce5ade330330ff418af9b0a0ef28150 +size 3756180 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..906d8d9fe81090766f71309be76aca9ae78f5547 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_True-or-False_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45cf5cf3296a5bc8932327fadb4671164511cabddd278ecaa152894d4cc05505 +size 4220480 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..533448fa9b9bab8a9d1aa27df8b85d20bcb8021f --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5acf30bd323ad47d66550bfbd112b27273e8da9e08e5a597fafa5bfaf38efb1 +size 1010066 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2a9526d787794181c0e56fd102a707abdcc76369 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a9c2402e9ee30426a07d3f72cf90b69f8ece1432d9cdeb77417a8e8e5a68732 +size 1243093 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..bc574da37573df1a096b3190a59d39a1c6aefadc --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:705e4b0c344971db022986a90e41ca8ba7419b1c8af418671e9c0e55bee57591 +size 1476398 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2b3f54e44f349d225bb7e7643e807ade4caca5a6 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8878a04c2d612880706fc1ce62f20ce78468a9146326f49275b516547fc7c069 +size 1709994 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b3f6afa2089ceddbeb99daa82687e2872a2ed512 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41c987bd7bfee27e188fa09d05a0f641673a84d3830003190d255fdd024d8821 +size 3884778 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b407d92b8dbd819c3476b1d55afc3bee257f2767 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_does-underscore-refer-to_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53dd72eba51cea4510073916d037e57cc7a92aa88b57e0e37117b96cb7c0c61e +size 2175236 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3b82618b9e0c232a7e9bd78d966742f2249a48e1 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faede6ed1d677395136d9cee531d68344eb72d2546e82628f63d6a54cb3955e9 +size 969344 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..feb80a71a8ab392bd5cd28e576edca6f2d713aba --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2024f64600888261a194049d4edb5fc9ba1b98a39e76863fb8a186e58b98beb +size 1204979 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a35e10015bd9c192ec2f7db53cdac60bf4449e73 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d7fc84e8984df67d74b5dde5b11487d80583ddcff13c07d1765e7de431307bf +size 1440783 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5588320ed45b11a9fcc24f0a21cdcceb71f3ae62 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16e263ab780c3ba4a27f209d7d48656d0bb6e34667dcf0f04bc7bdd61b718ec7 +size 1676899 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..168c9c837a14c3987993dce162459ebdca5ac0f4 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfad3c4c4944267f3201d278aca03f1514e958905f95f48262be74a0c32f754f +size 1911878 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3f7b1f6d574ec5106a466a6f13ab7f4db0494714 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_stand-for_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a2a23e43e614db8044ee6fa4567f0accba5fdb35c7bb0ad6a931cb1f0efe252 +size 2147272 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_0.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..15e9a76f89a32b10de50031c280e31d5fe67ca96 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:505f3ce315d9ee652bfbda30c424063f23b4abe1d80afb978c90c2c95db3d72d +size 1016552 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_1.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..889d24c0d2dd3080ad370abf0b3bc9394f1b3c56 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0affb89628e6c2d79ef5302c75b915b6c02bf147af426e6082e86bced9f4576 +size 1257057 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_2.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..38ebada572f0aac4e6c92e8d40357d206c05235d --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66c9e36c56917208c423e029eae1e843cfa154a96bb5017615a09044dfe4ea48 +size 1497932 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_3.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4ccd7b150cf5a137f154dfb1500b0a702be3c417 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508b6f1ffefc6c273fc45a2031776ed04a8d949265619e71452c88ff4d7b64bd +size 1739120 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_4.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b7f245074fc0ae6044a029f4381558d2ceb02953 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad97711b1d642f8d3ab22b48ead42b9f48b689d564924d740941827a0e8e9c16 +size 1979136 diff --git a/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_5.jsonl b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5dd98916aab410acca8cd2ba2485de352e966621 --- /dev/null +++ b/146m14b14b/eval/examples.lm1-146m-14b_winogrande_underscore-refer-to_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68c05139f50f35452ee34ee300b1381523f5af6e05c054c7e16a01172d3bdb4b +size 2219583 diff --git a/146m14b14b/eval/merged.csv b/146m14b14b/eval/merged.csv new file mode 100644 index 0000000000000000000000000000000000000000..455a59fa7b4f60883902085db28f70257957f6a7 --- /dev/null +++ b/146m14b14b/eval/merged.csv @@ -0,0 +1,587 @@ +dataset,fewshots,prompt,metric,value +anli_r1,0,GPT-3 style,acc,0.336 +anli_r1,0,MNLI crowdsource,acc,0.334 +anli_r1,0,can we infer,acc,0.34 +anli_r1,0,guaranteed/possible/impossible,acc,0.331 +anli_r1,0,justified in saying,acc,0.341 +anli_r1,0,median,accuracy,0.336 +anli_r1,1,GPT-3 style,acc,0.341 +anli_r1,1,MNLI crowdsource,acc,0.333 +anli_r1,1,can we infer,acc,0.33 +anli_r1,1,guaranteed/possible/impossible,acc,0.333 +anli_r1,1,justified in saying,acc,0.332 +anli_r1,1,median,accuracy,0.333 +anli_r1,2,GPT-3 style,acc,0.363 +anli_r1,2,MNLI crowdsource,acc,0.359 +anli_r1,2,can we infer,acc,0.351 +anli_r1,2,guaranteed/possible/impossible,acc,0.347 +anli_r1,2,justified in saying,acc,0.357 +anli_r1,2,median,accuracy,0.357 +anli_r1,3,GPT-3 style,acc,0.357 +anli_r1,3,MNLI crowdsource,acc,0.349 +anli_r1,3,can we infer,acc,0.363 +anli_r1,3,guaranteed/possible/impossible,acc,0.347 +anli_r1,3,justified in saying,acc,0.351 +anli_r1,3,median,accuracy,0.351 +anli_r1,4,GPT-3 style,acc,0.363 +anli_r1,4,MNLI crowdsource,acc,0.342 +anli_r1,4,can we infer,acc,0.363 +anli_r1,4,guaranteed/possible/impossible,acc,0.344 +anli_r1,4,justified in saying,acc,0.362 +anli_r1,4,median,accuracy,0.362 +anli_r1,5,GPT-3 style,acc,0.351 +anli_r1,5,MNLI crowdsource,acc,0.34 +anli_r1,5,can we infer,acc,0.355 +anli_r1,5,guaranteed/possible/impossible,acc,0.335 +anli_r1,5,justified in saying,acc,0.35 +anli_r1,5,median,accuracy,0.35 +anli_r1,5,average,multiple,0.3481666666666667 +anli_r2,0,GPT-3 style,acc,0.342 +anli_r2,0,MNLI crowdsource,acc,0.334 +anli_r2,0,can we infer,acc,0.336 +anli_r2,0,guaranteed/possible/impossible,acc,0.329 +anli_r2,0,justified in saying,acc,0.335 +anli_r2,0,median,accuracy,0.335 +anli_r2,1,GPT-3 style,acc,0.318 +anli_r2,1,MNLI crowdsource,acc,0.315 +anli_r2,1,can we infer,acc,0.331 +anli_r2,1,guaranteed/possible/impossible,acc,0.315 +anli_r2,1,justified in saying,acc,0.322 +anli_r2,1,median,accuracy,0.318 +anli_r2,2,GPT-3 style,acc,0.322 +anli_r2,2,MNLI crowdsource,acc,0.321 +anli_r2,2,can we infer,acc,0.327 +anli_r2,2,guaranteed/possible/impossible,acc,0.326 +anli_r2,2,justified in saying,acc,0.331 +anli_r2,2,median,accuracy,0.326 +anli_r2,3,GPT-3 style,acc,0.329 +anli_r2,3,MNLI crowdsource,acc,0.309 +anli_r2,3,can we infer,acc,0.332 +anli_r2,3,guaranteed/possible/impossible,acc,0.321 +anli_r2,3,justified in saying,acc,0.325 +anli_r2,3,median,accuracy,0.325 +anli_r2,4,GPT-3 style,acc,0.333 +anli_r2,4,MNLI crowdsource,acc,0.291 +anli_r2,4,can we infer,acc,0.321 +anli_r2,4,guaranteed/possible/impossible,acc,0.313 +anli_r2,4,justified in saying,acc,0.314 +anli_r2,4,median,accuracy,0.314 +anli_r2,5,GPT-3 style,acc,0.34 +anli_r2,5,MNLI crowdsource,acc,0.325 +anli_r2,5,can we infer,acc,0.331 +anli_r2,5,guaranteed/possible/impossible,acc,0.314 +anli_r2,5,justified in saying,acc,0.329 +anli_r2,5,median,accuracy,0.329 +anli_r2,5,average,multiple,0.3245 +anli_r3,0,GPT-3 style,acc,0.32916666666666666 +anli_r3,0,MNLI crowdsource,acc,0.335 +anli_r3,0,can we infer,acc,0.3475 +anli_r3,0,guaranteed/possible/impossible,acc,0.33166666666666667 +anli_r3,0,justified in saying,acc,0.3475 +anli_r3,0,median,accuracy,0.335 +anli_r3,1,GPT-3 style,acc,0.35 +anli_r3,1,MNLI crowdsource,acc,0.33666666666666667 +anli_r3,1,can we infer,acc,0.3525 +anli_r3,1,guaranteed/possible/impossible,acc,0.33666666666666667 +anli_r3,1,justified in saying,acc,0.3383333333333333 +anli_r3,1,median,accuracy,0.3383333333333333 +anli_r3,2,GPT-3 style,acc,0.33666666666666667 +anli_r3,2,MNLI crowdsource,acc,0.32666666666666666 +anli_r3,2,can we infer,acc,0.335 +anli_r3,2,guaranteed/possible/impossible,acc,0.32166666666666666 +anli_r3,2,justified in saying,acc,0.3258333333333333 +anli_r3,2,median,accuracy,0.32666666666666666 +anli_r3,3,GPT-3 style,acc,0.3516666666666667 +anli_r3,3,MNLI crowdsource,acc,0.33916666666666667 +anli_r3,3,can we infer,acc,0.345 +anli_r3,3,guaranteed/possible/impossible,acc,0.3416666666666667 +anli_r3,3,justified in saying,acc,0.335 +anli_r3,3,median,accuracy,0.3416666666666667 +anli_r3,4,GPT-3 style,acc,0.3466666666666667 +anli_r3,4,MNLI crowdsource,acc,0.3225 +anli_r3,4,can we infer,acc,0.32666666666666666 +anli_r3,4,guaranteed/possible/impossible,acc,0.33416666666666667 +anli_r3,4,justified in saying,acc,0.3258333333333333 +anli_r3,4,median,accuracy,0.32666666666666666 +anli_r3,5,GPT-3 style,acc,0.3325 +anli_r3,5,MNLI crowdsource,acc,0.3258333333333333 +anli_r3,5,can we infer,acc,0.3175 +anli_r3,5,guaranteed/possible/impossible,acc,0.31166666666666665 +anli_r3,5,justified in saying,acc,0.31333333333333335 +anli_r3,5,median,accuracy,0.3175 +anli_r3,5,average,multiple,0.3309722222222222 +arc_easy,0,heres_a_problem,acc,0.22013651877133106 +arc_easy,0,i_am_hesitating,acc,0.2098976109215017 +arc_easy,0,multiple_choice,acc,0.2235494880546075 +arc_easy,0,pick_the_most_correct_option,acc,0.2478956228956229 +arc_easy,0,qa_options,acc,0.20477815699658702 +arc_easy,0,median,accuracy,0.22013651877133106 +arc_easy,1,heres_a_problem,acc,0.2478956228956229 +arc_easy,1,i_am_hesitating,acc,0.2828282828282828 +arc_easy,1,multiple_choice,acc,0.2781986531986532 +arc_easy,1,pick_the_most_correct_option,acc,0.25 +arc_easy,1,qa_options,acc,0.2098976109215017 +arc_easy,1,median,accuracy,0.25 +arc_easy,2,heres_a_problem,acc,0.26136363636363635 +arc_easy,2,i_am_hesitating,acc,0.2150170648464164 +arc_easy,2,multiple_choice,acc,0.2760942760942761 +arc_easy,2,pick_the_most_correct_option,acc,0.26641414141414144 +arc_easy,2,qa_options,acc,0.21160409556313994 +arc_easy,2,median,accuracy,0.26136363636363635 +arc_easy,3,heres_a_problem,acc,0.2508532423208191 +arc_easy,3,i_am_hesitating,acc,0.2150170648464164 +arc_easy,3,multiple_choice,acc,0.2697811447811448 +arc_easy,3,pick_the_most_correct_option,acc,0.2380546075085324 +arc_easy,3,qa_options,acc,0.2090443686006826 +arc_easy,3,median,accuracy,0.2380546075085324 +arc_easy,4,heres_a_problem,acc,0.2593856655290102 +arc_easy,4,i_am_hesitating,acc,0.22525597269624573 +arc_easy,4,multiple_choice,acc,0.26262626262626265 +arc_easy,4,pick_the_most_correct_option,acc,0.24914675767918087 +arc_easy,4,qa_options,acc,0.22866894197952217 +arc_easy,4,median,accuracy,0.24914675767918087 +arc_easy,5,heres_a_problem,acc,0.2622053872053872 +arc_easy,5,i_am_hesitating,acc,0.27441077441077444 +arc_easy,5,multiple_choice,acc,0.2030716723549488 +arc_easy,5,pick_the_most_correct_option,acc,0.2440273037542662 +arc_easy,5,qa_options,acc,0.2760942760942761 +arc_easy,5,median,accuracy,0.2622053872053872 +arc_easy,5,average,multiple,0.24681781792134466 +boolq,0,GPT-3 Style,acc,0.5703333333333334 +boolq,0,after_reading,acc,0.6236666666666667 +boolq,0,exercise,acc,0.6233333333333333 +boolq,0,valid_binary,acc,0.61 +boolq,0,yes_no_question,acc,0.624 +boolq,0,median,accuracy,0.6233333333333333 +boolq,1,GPT-3 Style,acc,0.536 +boolq,1,after_reading,acc,0.575 +boolq,1,exercise,acc,0.5423333333333333 +boolq,1,valid_binary,acc,0.5586666666666666 +boolq,1,yes_no_question,acc,0.5476666666666666 +boolq,1,median,accuracy,0.5476666666666666 +boolq,2,GPT-3 Style,acc,0.5546666666666666 +boolq,2,after_reading,acc,0.593 +boolq,2,exercise,acc,0.5223333333333333 +boolq,2,valid_binary,acc,0.5856666666666667 +boolq,2,yes_no_question,acc,0.579 +boolq,2,median,accuracy,0.579 +boolq,3,GPT-3 Style,acc,0.5446666666666666 +boolq,3,after_reading,acc,0.5793333333333334 +boolq,3,exercise,acc,0.5453333333333333 +boolq,3,valid_binary,acc,0.5883333333333334 +boolq,3,yes_no_question,acc,0.5733333333333334 +boolq,3,median,accuracy,0.5733333333333334 +boolq,4,GPT-3 Style,acc,0.551 +boolq,4,after_reading,acc,0.5696666666666667 +boolq,4,exercise,acc,0.5423333333333333 +boolq,4,valid_binary,acc,0.581 +boolq,4,yes_no_question,acc,0.575 +boolq,4,median,accuracy,0.5696666666666667 +boolq,5,GPT-3 Style,acc,0.5473333333333333 +boolq,5,after_reading,acc,0.5663333333333334 +boolq,5,exercise,acc,0.543 +boolq,5,valid_binary,acc,0.582 +boolq,5,yes_no_question,acc,0.5706666666666667 +boolq,5,median,accuracy,0.5663333333333334 +boolq,5,average,multiple,0.5765555555555555 +cb,0,GPT-3 style,acc,0.4107142857142857 +cb,0,MNLI crowdsource,acc,0.4107142857142857 +cb,0,can we infer,acc,0.5714285714285714 +cb,0,guaranteed/possible/impossible,acc,0.14285714285714285 +cb,0,justified in saying,acc,0.6071428571428571 +cb,0,median,accuracy,0.4107142857142857 +cb,1,GPT-3 style,acc,0.39285714285714285 +cb,1,MNLI crowdsource,acc,0.39285714285714285 +cb,1,can we infer,acc,0.42857142857142855 +cb,1,guaranteed/possible/impossible,acc,0.39285714285714285 +cb,1,justified in saying,acc,0.44642857142857145 +cb,1,median,accuracy,0.39285714285714285 +cb,2,GPT-3 style,acc,0.44642857142857145 +cb,2,MNLI crowdsource,acc,0.4107142857142857 +cb,2,can we infer,acc,0.5 +cb,2,guaranteed/possible/impossible,acc,0.375 +cb,2,justified in saying,acc,0.48214285714285715 +cb,2,median,accuracy,0.44642857142857145 +cb,3,GPT-3 style,acc,0.39285714285714285 +cb,3,MNLI crowdsource,acc,0.39285714285714285 +cb,3,can we infer,acc,0.5 +cb,3,guaranteed/possible/impossible,acc,0.35714285714285715 +cb,3,justified in saying,acc,0.44642857142857145 +cb,3,median,accuracy,0.39285714285714285 +cb,4,GPT-3 style,acc,0.375 +cb,4,MNLI crowdsource,acc,0.4107142857142857 +cb,4,can we infer,acc,0.5 +cb,4,guaranteed/possible/impossible,acc,0.39285714285714285 +cb,4,justified in saying,acc,0.5 +cb,4,median,accuracy,0.4107142857142857 +cb,5,GPT-3 style,acc,0.39285714285714285 +cb,5,MNLI crowdsource,acc,0.35714285714285715 +cb,5,can we infer,acc,0.44642857142857145 +cb,5,guaranteed/possible/impossible,acc,0.375 +cb,5,justified in saying,acc,0.44642857142857145 +cb,5,median,accuracy,0.39285714285714285 +cb,5,average,multiple,0.40773809523809523 +copa,0,best_option,acc,0.58 +copa,0,cause_effect,acc,0.57 +copa,0,choose,acc,0.6 +copa,0,i_am_hesitating,acc,0.61 +copa,0,plausible_alternatives,acc,0.59 +copa,0,median,accuracy,0.59 +copa,1,best_option,acc,0.57 +copa,1,cause_effect,acc,0.57 +copa,1,choose,acc,0.54 +copa,1,i_am_hesitating,acc,0.55 +copa,1,plausible_alternatives,acc,0.53 +copa,1,median,accuracy,0.55 +copa,2,best_option,acc,0.56 +copa,2,cause_effect,acc,0.53 +copa,2,choose,acc,0.53 +copa,2,i_am_hesitating,acc,0.52 +copa,2,plausible_alternatives,acc,0.49 +copa,2,median,accuracy,0.53 +copa,3,best_option,acc,0.63 +copa,3,cause_effect,acc,0.53 +copa,3,choose,acc,0.48 +copa,3,i_am_hesitating,acc,0.52 +copa,3,plausible_alternatives,acc,0.49 +copa,3,median,accuracy,0.52 +copa,4,best_option,acc,0.59 +copa,4,cause_effect,acc,0.52 +copa,4,choose,acc,0.5 +copa,4,i_am_hesitating,acc,0.5 +copa,4,plausible_alternatives,acc,0.48 +copa,4,median,accuracy,0.5 +copa,5,best_option,acc,0.58 +copa,5,cause_effect,acc,0.51 +copa,5,choose,acc,0.48 +copa,5,i_am_hesitating,acc,0.49 +copa,5,plausible_alternatives,acc,0.46 +copa,5,median,accuracy,0.49 +copa,5,average,multiple,0.53 +e2e_nlg_cleaned,0,coherent_text,rouge2_fmeasure,0.0995456556042845 +e2e_nlg_cleaned,0,create_text_for_me,rouge2_fmeasure,0.05566703607474666 +e2e_nlg_cleaned,0,generate_gramatically_correct_text,rouge2_fmeasure,0.10146880175157826 +e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.0022741562354756397 +e2e_nlg_cleaned,0,text,rouge2_fmeasure,0.13831925575429133 +e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.0995456556042845 +e2e_nlg_cleaned,1,coherent_text,rouge2_fmeasure,0.09798530798331949 +e2e_nlg_cleaned,1,create_text_for_me,rouge2_fmeasure,0.08973585628096593 +e2e_nlg_cleaned,1,generate_gramatically_correct_text,rouge2_fmeasure,0.11107876311211701 +e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.12715929154890004 +e2e_nlg_cleaned,1,text,rouge2_fmeasure,0.11624053272466801 +e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.11107876311211701 +e2e_nlg_cleaned,2,coherent_text,rouge2_fmeasure,0.12398725975487611 +e2e_nlg_cleaned,2,create_text_for_me,rouge2_fmeasure,0.11659264308512794 +e2e_nlg_cleaned,2,generate_gramatically_correct_text,rouge2_fmeasure,0.11330107614177073 +e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.14362551846595048 +e2e_nlg_cleaned,2,text,rouge2_fmeasure,0.12917141922678907 +e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.12398725975487611 +e2e_nlg_cleaned,3,coherent_text,rouge2_fmeasure,0.1372251409323741 +e2e_nlg_cleaned,3,create_text_for_me,rouge2_fmeasure,0.13197675444985899 +e2e_nlg_cleaned,3,generate_gramatically_correct_text,rouge2_fmeasure,0.11772014214137563 +e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.155349432200804 +e2e_nlg_cleaned,3,text,rouge2_fmeasure,0.13699349439736094 +e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.13699349439736094 +e2e_nlg_cleaned,4,coherent_text,rouge2_fmeasure,0.14162675325545648 +e2e_nlg_cleaned,4,create_text_for_me,rouge2_fmeasure,0.13929100917815865 +e2e_nlg_cleaned,4,generate_gramatically_correct_text,rouge2_fmeasure,0.1204704109621881 +e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.15872608958193554 +e2e_nlg_cleaned,4,text,rouge2_fmeasure,0.1374084834705204 +e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.13929100917815865 +e2e_nlg_cleaned,5,coherent_text,rouge2_fmeasure,0.14450895819387546 +e2e_nlg_cleaned,5,create_text_for_me,rouge2_fmeasure,0.14278771616071478 +e2e_nlg_cleaned,5,generate_gramatically_correct_text,rouge2_fmeasure,0.12548313742099432 +e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.16117969333074014 +e2e_nlg_cleaned,5,text,rouge2_fmeasure,0.13963971276485945 +e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.14278771616071478 +e2e_nlg_cleaned,5,average,multiple,0.12561398303458532 +gem_xsum,0,DOC_boils_down_to_simple_idea_that,rouge2_fmeasure,0.01678428011344468 +gem_xsum,0,DOC_tldr,rouge2_fmeasure,0.02280597280652988 +gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.02588589928345875 +gem_xsum,0,summarize_DOC,rouge2_fmeasure,0.02239979264930804 +gem_xsum,0,summarize_this_DOC_summary,rouge2_fmeasure,0.02364976404549372 +gem_xsum,0,median,rouge2_fmeasure,0.02280597280652988 +gem_xsum,1,DOC_boils_down_to_simple_idea_that,rouge2_fmeasure,0.010073796253903308 +gem_xsum,1,DOC_tldr,rouge2_fmeasure,0.018246524077524105 +gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.01908194699059756 +gem_xsum,1,summarize_DOC,rouge2_fmeasure,0.02178998612851725 +gem_xsum,1,summarize_this_DOC_summary,rouge2_fmeasure,0.023239358291017316 +gem_xsum,1,median,rouge2_fmeasure,0.01908194699059756 +gem_xsum,2,DOC_boils_down_to_simple_idea_that,rouge2_fmeasure,0.011705506769985538 +gem_xsum,2,DOC_tldr,rouge2_fmeasure,0.020808658216032574 +gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.01990450612056915 +gem_xsum,2,summarize_DOC,rouge2_fmeasure,0.020699762711342613 +gem_xsum,2,summarize_this_DOC_summary,rouge2_fmeasure,0.02358829661609317 +gem_xsum,2,median,rouge2_fmeasure,0.020699762711342613 +gem_xsum,3,DOC_boils_down_to_simple_idea_that,rouge2_fmeasure,0.012326768637832572 +gem_xsum,3,DOC_tldr,rouge2_fmeasure,0.02056912761205674 +gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.02153589403065349 +gem_xsum,3,summarize_DOC,rouge2_fmeasure,0.02079595560428378 +gem_xsum,3,summarize_this_DOC_summary,rouge2_fmeasure,0.02410030299641933 +gem_xsum,3,median,rouge2_fmeasure,0.02079595560428378 +gem_xsum,4,DOC_boils_down_to_simple_idea_that,rouge2_fmeasure,0.0045358862257299385 +gem_xsum,4,DOC_tldr,rouge2_fmeasure,0.005605133916804629 +gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.005510795996651232 +gem_xsum,4,summarize_DOC,rouge2_fmeasure,0.006225565560992794 +gem_xsum,4,summarize_this_DOC_summary,rouge2_fmeasure,0.006031590914671055 +gem_xsum,4,median,rouge2_fmeasure,0.005605133916804629 +gem_xsum,5,DOC_boils_down_to_simple_idea_that,rouge2_fmeasure,0.0 +gem_xsum,5,DOC_tldr,rouge2_fmeasure,0.0003713888095226837 +gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.0007075672984960149 +gem_xsum,5,summarize_DOC,rouge2_fmeasure,0.0004607621449381724 +gem_xsum,5,summarize_this_DOC_summary,rouge2_fmeasure,0.0001905850962454736 +gem_xsum,5,median,rouge2_fmeasure,0.0003713888095226837 +gem_xsum,5,average,multiple,0.014893360139846858 +piqa,0,Correct the solution,rouge2_fmeasure,0.06739212009258444 +piqa,0,choose the most appropriate solution,acc,0.49891186071817195 +piqa,0,no prompt needed,rouge2_fmeasure,0.0038833183768095616 +piqa,0,pick_correct_choice_index,acc,0.49510337323177367 +piqa,0,what_is_the_correct_ending,acc,0.5136017410228509 +piqa,0,median,accuracy,0.49891186071817195 +piqa,1,Correct the solution,rouge2_fmeasure,0.11307708650003363 +piqa,1,choose the most appropriate solution,acc,0.5021762785636561 +piqa,1,no prompt needed,rouge2_fmeasure,0.007982487413659915 +piqa,1,pick_correct_choice_index,acc,0.5048966267682263 +piqa,1,what_is_the_correct_ending,acc,0.5206746463547334 +piqa,1,median,accuracy,0.5048966267682263 +piqa,2,Correct the solution,rouge2_fmeasure,0.22646917848202447 +piqa,2,choose the most appropriate solution,acc,0.5119695321001088 +piqa,2,no prompt needed,rouge2_fmeasure,0.0063412101839208194 +piqa,2,pick_correct_choice_index,acc,0.5027203482045702 +piqa,2,what_is_the_correct_ending,acc,0.5201305767138193 +piqa,2,median,accuracy,0.5119695321001088 +piqa,3,Correct the solution,rouge2_fmeasure,0.2818434589400104 +piqa,3,choose the most appropriate solution,acc,0.5032644178454843 +piqa,3,no prompt needed,rouge2_fmeasure,0.006082833337651904 +piqa,3,pick_correct_choice_index,acc,0.5108813928182807 +piqa,3,what_is_the_correct_ending,acc,0.5223068552774756 +piqa,3,median,accuracy,0.5108813928182807 +piqa,4,Correct the solution,rouge2_fmeasure,0.32028893653738877 +piqa,4,choose the most appropriate solution,acc,0.5038084874863983 +piqa,4,no prompt needed,rouge2_fmeasure,0.0070696546789010785 +piqa,4,pick_correct_choice_index,acc,0.5087051142546246 +piqa,4,what_is_the_correct_ending,acc,0.529923830250272 +piqa,4,median,accuracy,0.5087051142546246 +piqa,5,Correct the solution,rouge2_fmeasure,0.3436917448426498 +piqa,5,choose the most appropriate solution,acc,0.5016322089227421 +piqa,5,no prompt needed,rouge2_fmeasure,0.006029826803091018 +piqa,5,pick_correct_choice_index,acc,0.5032644178454843 +piqa,5,what_is_the_correct_ending,acc,0.5272034820457019 +piqa,5,median,accuracy,0.5032644178454843 +piqa,5,average,multiple,0.5064381574174828 +sciq,0,Direct Question,acc,0.703 +sciq,0,Direct Question (Closed Book),acc,0.397 +sciq,0,Multiple Choice,acc,0.486 +sciq,0,Multiple Choice (Closed Book),acc,0.373 +sciq,0,Multiple Choice Question First,acc,0.508 +sciq,0,median,accuracy,0.486 +sciq,1,Direct Question,acc,0.723 +sciq,1,Direct Question (Closed Book),acc,0.415 +sciq,1,Multiple Choice,acc,0.427 +sciq,1,Multiple Choice (Closed Book),acc,0.376 +sciq,1,Multiple Choice Question First,acc,0.411 +sciq,1,median,accuracy,0.415 +sciq,2,Direct Question,acc,0.721 +sciq,2,Direct Question (Closed Book),acc,0.405 +sciq,2,Multiple Choice,acc,0.401 +sciq,2,Multiple Choice (Closed Book),acc,0.36 +sciq,2,Multiple Choice Question First,acc,0.366 +sciq,2,median,accuracy,0.401 +sciq,3,Direct Question,acc,0.719 +sciq,3,Direct Question (Closed Book),acc,0.396 +sciq,3,Multiple Choice,acc,0.385 +sciq,3,Multiple Choice (Closed Book),acc,0.325 +sciq,3,Multiple Choice Question First,acc,0.368 +sciq,3,median,accuracy,0.385 +sciq,4,Direct Question,acc,0.723 +sciq,4,Direct Question (Closed Book),acc,0.383 +sciq,4,Multiple Choice,acc,0.376 +sciq,4,Multiple Choice (Closed Book),acc,0.341 +sciq,4,Multiple Choice Question First,acc,0.37 +sciq,4,median,accuracy,0.376 +sciq,5,Direct Question,acc,0.718 +sciq,5,Direct Question (Closed Book),acc,0.388 +sciq,5,Multiple Choice,acc,0.355 +sciq,5,Multiple Choice (Closed Book),acc,0.332 +sciq,5,Multiple Choice Question First,acc,0.361 +sciq,5,median,accuracy,0.361 +sciq,5,average,multiple,0.404 +story_cloze_2016,0,Answer Given options,acc,0.5018706574024586 +story_cloze_2016,0,Choose Story Ending,acc,0.5093532870122929 +story_cloze_2016,0,Novel Correct Ending,acc,0.504008551576697 +story_cloze_2016,0,Story Continuation and Options,acc,0.5109567076429716 +story_cloze_2016,0,median,accuracy,0.506680919294495 +story_cloze_2016,1,Answer Given options,acc,0.4922501336183859 +story_cloze_2016,1,Choose Story Ending,acc,0.49706039551042225 +story_cloze_2016,1,Novel Correct Ending,acc,0.4906467129877071 +story_cloze_2016,1,Story Continuation and Options,acc,0.4917156600748263 +story_cloze_2016,1,median,accuracy,0.4919828968466061 +story_cloze_2016,2,Answer Given options,acc,0.4794227685729556 +story_cloze_2016,2,Choose Story Ending,acc,0.4863709246392304 +story_cloze_2016,2,Novel Correct Ending,acc,0.4719401389631213 +story_cloze_2016,2,Story Continuation and Options,acc,0.48743987172634956 +story_cloze_2016,2,median,accuracy,0.482896846606093 +story_cloze_2016,3,Answer Given options,acc,0.4836985569214324 +story_cloze_2016,3,Choose Story Ending,acc,0.4922501336183859 +story_cloze_2016,3,Novel Correct Ending,acc,0.4820951362907536 +story_cloze_2016,3,Story Continuation and Options,acc,0.481560662747194 +story_cloze_2016,3,median,accuracy,0.482896846606093 +story_cloze_2016,4,Answer Given options,acc,0.48476750400855156 +story_cloze_2016,4,Choose Story Ending,acc,0.4778193479422769 +story_cloze_2016,4,Novel Correct Ending,acc,0.4820951362907536 +story_cloze_2016,4,Story Continuation and Options,acc,0.47888829502939606 +story_cloze_2016,4,median,accuracy,0.48049171566007487 +story_cloze_2016,5,Answer Given options,acc,0.47888829502939606 +story_cloze_2016,5,Choose Story Ending,acc,0.4735435595938001 +story_cloze_2016,5,Novel Correct Ending,acc,0.4730090860502405 +story_cloze_2016,5,Story Continuation and Options,acc,0.47033671833244256 +story_cloze_2016,5,median,accuracy,0.4732763228220203 +story_cloze_2016,5,average,multiple,0.4863709246392304 +superglue_rte,0,GPT-3 style,acc,0.5379061371841155 +superglue_rte,0,MNLI crowdsource,acc,0.5270758122743683 +superglue_rte,0,does it follow that,acc,0.516245487364621 +superglue_rte,0,guaranteed true,acc,0.5342960288808665 +superglue_rte,0,should assume,acc,0.5306859205776173 +superglue_rte,0,median,accuracy,0.5306859205776173 +superglue_rte,1,GPT-3 style,acc,0.48375451263537905 +superglue_rte,1,MNLI crowdsource,acc,0.48375451263537905 +superglue_rte,1,does it follow that,acc,0.4981949458483754 +superglue_rte,1,guaranteed true,acc,0.48014440433212996 +superglue_rte,1,should assume,acc,0.4729241877256318 +superglue_rte,1,median,accuracy,0.48375451263537905 +superglue_rte,2,GPT-3 style,acc,0.5523465703971119 +superglue_rte,2,MNLI crowdsource,acc,0.5018050541516246 +superglue_rte,2,does it follow that,acc,0.5054151624548736 +superglue_rte,2,guaranteed true,acc,0.49458483754512633 +superglue_rte,2,should assume,acc,0.49458483754512633 +superglue_rte,2,median,accuracy,0.5018050541516246 +superglue_rte,3,GPT-3 style,acc,0.5054151624548736 +superglue_rte,3,MNLI crowdsource,acc,0.4981949458483754 +superglue_rte,3,does it follow that,acc,0.48736462093862815 +superglue_rte,3,guaranteed true,acc,0.4981949458483754 +superglue_rte,3,should assume,acc,0.48375451263537905 +superglue_rte,3,median,accuracy,0.4981949458483754 +superglue_rte,4,GPT-3 style,acc,0.5270758122743683 +superglue_rte,4,MNLI crowdsource,acc,0.49458483754512633 +superglue_rte,4,does it follow that,acc,0.5090252707581228 +superglue_rte,4,guaranteed true,acc,0.47653429602888087 +superglue_rte,4,should assume,acc,0.4729241877256318 +superglue_rte,4,median,accuracy,0.49458483754512633 +superglue_rte,5,GPT-3 style,acc,0.48375451263537905 +superglue_rte,5,MNLI crowdsource,acc,0.49458483754512633 +superglue_rte,5,does it follow that,acc,0.48014440433212996 +superglue_rte,5,guaranteed true,acc,0.49458483754512633 +superglue_rte,5,should assume,acc,0.48736462093862815 +superglue_rte,5,median,accuracy,0.48736462093862815 +superglue_rte,5,average,multiple,0.4993983152827918 +web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.04185850357277026 +web_nlg_en,0,explicit-graph-description2,rouge2_fmeasure,0.0010295704236990917 +web_nlg_en,0,implicit-graph-description,rouge2_fmeasure,0.0043358346468287505 +web_nlg_en,0,non-explicit-description,rouge2_fmeasure,0.00644935707690507 +web_nlg_en,0,very-explicit-description,rouge2_fmeasure,0.0645288094675153 +web_nlg_en,0,median,rouge2_fmeasure,0.00644935707690507 +web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.03690045692783099 +web_nlg_en,1,explicit-graph-description2,rouge2_fmeasure,0.015048493432535332 +web_nlg_en,1,implicit-graph-description,rouge2_fmeasure,0.015147654240964752 +web_nlg_en,1,non-explicit-description,rouge2_fmeasure,0.023945131921722818 +web_nlg_en,1,very-explicit-description,rouge2_fmeasure,0.056216955504673816 +web_nlg_en,1,median,rouge2_fmeasure,0.023945131921722818 +web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.03847532772409176 +web_nlg_en,2,explicit-graph-description2,rouge2_fmeasure,0.04229150796312311 +web_nlg_en,2,implicit-graph-description,rouge2_fmeasure,0.05131564722525416 +web_nlg_en,2,non-explicit-description,rouge2_fmeasure,0.050683857012723176 +web_nlg_en,2,very-explicit-description,rouge2_fmeasure,0.07054805798392726 +web_nlg_en,2,median,rouge2_fmeasure,0.050683857012723176 +web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.036941223370484465 +web_nlg_en,3,explicit-graph-description2,rouge2_fmeasure,0.05870693940495815 +web_nlg_en,3,implicit-graph-description,rouge2_fmeasure,0.06387420889427227 +web_nlg_en,3,non-explicit-description,rouge2_fmeasure,0.06763126769213473 +web_nlg_en,3,very-explicit-description,rouge2_fmeasure,0.07655162918992574 +web_nlg_en,3,median,rouge2_fmeasure,0.06387420889427227 +web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.03752454514172853 +web_nlg_en,4,explicit-graph-description2,rouge2_fmeasure,0.06941913517884618 +web_nlg_en,4,implicit-graph-description,rouge2_fmeasure,0.07429307318653183 +web_nlg_en,4,non-explicit-description,rouge2_fmeasure,0.07863230130745369 +web_nlg_en,4,very-explicit-description,rouge2_fmeasure,0.08291524084048744 +web_nlg_en,4,median,rouge2_fmeasure,0.07429307318653183 +web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.03903350582230713 +web_nlg_en,5,explicit-graph-description2,rouge2_fmeasure,0.07628545628688663 +web_nlg_en,5,implicit-graph-description,rouge2_fmeasure,0.08476444821349621 +web_nlg_en,5,non-explicit-description,rouge2_fmeasure,0.08355597806644839 +web_nlg_en,5,very-explicit-description,rouge2_fmeasure,0.08265916020277501 +web_nlg_en,5,median,rouge2_fmeasure,0.08265916020277501 +web_nlg_en,5,average,multiple,0.0503174647158217 +wiki_lingua_en,0,article_summary_en,rouge2_fmeasure,0.056425445419599346 +wiki_lingua_en,0,rephrase_en,rouge2_fmeasure,0.011588030659270141 +wiki_lingua_en,0,summarize_above_en,rouge2_fmeasure,0.005239039550783603 +wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.014011218070534905 +wiki_lingua_en,0,write_abstract_en,rouge2_fmeasure,0.016689036256012268 +wiki_lingua_en,0,median,rouge2_fmeasure,0.014011218070534905 +wiki_lingua_en,1,article_summary_en,rouge2_fmeasure,0.03048470823212383 +wiki_lingua_en,1,rephrase_en,rouge2_fmeasure,0.003914558098208722 +wiki_lingua_en,1,summarize_above_en,rouge2_fmeasure,0.0037395950495648657 +wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.007230676056378635 +wiki_lingua_en,1,write_abstract_en,rouge2_fmeasure,0.003943433623482069 +wiki_lingua_en,1,median,rouge2_fmeasure,0.003943433623482069 +wiki_lingua_en,2,article_summary_en,rouge2_fmeasure,0.028283684834043706 +wiki_lingua_en,2,rephrase_en,rouge2_fmeasure,0.005443826668587504 +wiki_lingua_en,2,summarize_above_en,rouge2_fmeasure,0.006572282621218325 +wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.014134158871786717 +wiki_lingua_en,2,write_abstract_en,rouge2_fmeasure,0.006134109611250491 +wiki_lingua_en,2,median,rouge2_fmeasure,0.006572282621218325 +wiki_lingua_en,3,article_summary_en,rouge2_fmeasure,0.021957452841427055 +wiki_lingua_en,3,rephrase_en,rouge2_fmeasure,0.0066746059699027156 +wiki_lingua_en,3,summarize_above_en,rouge2_fmeasure,0.00773667698720356 +wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.015114213677740143 +wiki_lingua_en,3,write_abstract_en,rouge2_fmeasure,0.006138307630609741 +wiki_lingua_en,3,median,rouge2_fmeasure,0.00773667698720356 +wiki_lingua_en,4,article_summary_en,rouge2_fmeasure,0.008202499844200142 +wiki_lingua_en,4,rephrase_en,rouge2_fmeasure,0.003519391416781204 +wiki_lingua_en,4,summarize_above_en,rouge2_fmeasure,0.00317018932471084 +wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.006349834491065967 +wiki_lingua_en,4,write_abstract_en,rouge2_fmeasure,0.00167678535423955 +wiki_lingua_en,4,median,rouge2_fmeasure,0.003519391416781204 +wiki_lingua_en,5,article_summary_en,rouge2_fmeasure,0.001354763281978201 +wiki_lingua_en,5,rephrase_en,rouge2_fmeasure,0.0005748508715984116 +wiki_lingua_en,5,summarize_above_en,rouge2_fmeasure,0.0007838553827958208 +wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0011831626630347113 +wiki_lingua_en,5,write_abstract_en,rouge2_fmeasure,0.00013934402915387212 +wiki_lingua_en,5,median,rouge2_fmeasure,0.0007838553827958208 +wiki_lingua_en,5,average,multiple,0.00609447635033598 +winogrande,0,Replace,acc,0.505130228887135 +winogrande,0,True or False,acc,0.4964483030781373 +winogrande,0,does underscore refer to,acc,0.5067087608524072 +winogrande,0,stand for,acc,0.5256511444356748 +winogrande,0,underscore refer to,acc,0.5130228887134964 +winogrande,0,median,accuracy,0.5067087608524072 +winogrande,1,Replace,acc,0.5059194948697711 +winogrande,1,True or False,acc,0.4925019731649566 +winogrande,1,does underscore refer to,acc,0.5169692186266772 +winogrande,1,stand for,acc,0.5146014206787688 +winogrande,1,underscore refer to,acc,0.5027624309392266 +winogrande,1,median,accuracy,0.5059194948697711 +winogrande,2,Replace,acc,0.4980268350434096 +winogrande,2,True or False,acc,0.5027624309392266 +winogrande,2,does underscore refer to,acc,0.489344909234412 +winogrande,2,stand for,acc,0.5082872928176796 +winogrande,2,underscore refer to,acc,0.5011838989739542 +winogrande,2,median,accuracy,0.5011838989739542 +winogrande,3,Replace,acc,0.5090765588003157 +winogrande,3,True or False,acc,0.516179952644041 +winogrande,3,does underscore refer to,acc,0.500394632991318 +winogrande,3,stand for,acc,0.49329123914759276 +winogrande,3,underscore refer to,acc,0.500394632991318 +winogrande,3,median,accuracy,0.500394632991318 +winogrande,4,Replace,acc,0.5011838989739542 +winogrande,4,True or False,acc,0.500394632991318 +winogrande,4,does underscore refer to,acc,0.48855564325177586 +winogrande,4,stand for,acc,0.5035516969218626 +winogrande,4,underscore refer to,acc,0.49013417521704816 +winogrande,4,median,accuracy,0.500394632991318 +winogrande,5,Replace,acc,0.5114443567482242 +winogrande,5,True or False,acc,0.5011838989739542 +winogrande,5,does underscore refer to,acc,0.4964483030781373 +winogrande,5,stand for,acc,0.5114443567482242 +winogrande,5,underscore refer to,acc,0.48697711128650356 +winogrande,5,median,accuracy,0.5011838989739542 +winogrande,5,average,multiple,0.5026308866087872 diff --git a/146m14b14b/eval/merged.json b/146m14b14b/eval/merged.json new file mode 100644 index 0000000000000000000000000000000000000000..df85dae2a3b0c196be304c40c17bfe6f753a10eb --- /dev/null +++ b/146m14b14b/eval/merged.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.2304122240550486, "bleu_stderr": 0.02282542594810443, "rouge1_fmeasure": 0.088654136660333, "rouge1_fmeasure_stderr": 0.0018667826313676483, "rouge1_precision": 0.060241646379365375, "rouge1_precision_stderr": 0.0018281076733804075, "rouge1_recall": 0.24592443008643936, "rouge1_recall_stderr": 0.00451257736641835, "rouge2_fmeasure": 0.04185850357277026, "rouge2_fmeasure_stderr": 0.0011315772112214053, "rouge2_precision": 0.028611694237188352, "rouge2_precision_stderr": 0.001256141851839684, "rouge2_recall": 0.1211577510207888, "rouge2_recall_stderr": 0.0030274510689492543, "rougeL_fmeasure": 0.08728570338341435, "rougeL_fmeasure_stderr": 0.001811977941125175, "rougeL_precision": 0.059215633929813966, "rougeL_precision_stderr": 0.0017815112040087317, "rougeL_recall": 0.2428343490294157, "rougeL_recall_stderr": 0.004471374721999319, "rougeLsum_fmeasure": 0.08440847938326362, "rougeLsum_fmeasure_stderr": 0.0017495023319111394, "rougeLsum_precision": 0.057472515102994094, "rougeLsum_precision_stderr": 0.0017658517107930081, "rougeLsum_recall": 0.2343449114940915, "rougeLsum_recall_stderr": 0.004180785536759541}, "explicit-graph-description2": {"bleu": 0.005621069280208802, "bleu_stderr": 0.00010070462717428731, "rouge1_fmeasure": 0.03406862468978302, "rouge1_fmeasure_stderr": 0.0007291397354522542, "rouge1_precision": 0.021212341576732085, "rouge1_precision_stderr": 0.0007573290174140969, "rouge1_recall": 0.15394872315339908, "rouge1_recall_stderr": 0.0025907246130094292, "rouge2_fmeasure": 0.0010295704236990917, "rouge2_fmeasure_stderr": 9.29161964969125e-05, "rouge2_precision": 0.0007330327285282334, "rouge2_precision_stderr": 0.00013102839162176146, "rouge2_recall": 0.0060791270967935854, "rouge2_recall_stderr": 0.0005096344336408659, "rougeL_fmeasure": 0.033775131528634185, "rougeL_fmeasure_stderr": 0.0006877591126027594, "rougeL_precision": 0.020816161702046237, "rougeL_precision_stderr": 0.0006635945167961655, "rougeL_recall": 0.15359448098622203, "rougeL_recall_stderr": 0.00258622832573184, "rougeLsum_fmeasure": 0.023579045736592925, "rougeLsum_fmeasure_stderr": 0.0005251315581581948, "rougeLsum_precision": 0.014879313282812858, "rougeLsum_precision_stderr": 0.0006265155007215373, "rougeLsum_recall": 0.11208211458026511, "rougeLsum_recall_stderr": 0.001969123293960845}, "implicit-graph-description": {"bleu": 0.06678968022431767, "bleu_stderr": 0.013291302987332878, "rouge1_fmeasure": 0.04180906676802502, "rouge1_fmeasure_stderr": 0.000892515049715985, "rouge1_precision": 0.029914442023692294, "rouge1_precision_stderr": 0.0013899685699484238, "rouge1_recall": 0.17191853554387962, "rouge1_recall_stderr": 0.0027258688235086617, "rouge2_fmeasure": 0.0043358346468287505, "rouge2_fmeasure_stderr": 0.0003699728130996654, "rouge2_precision": 0.002770987452397337, "rouge2_precision_stderr": 0.00026930245436038245, "rouge2_recall": 0.018458908430645037, "rouge2_recall_stderr": 0.0012055847911475867, "rougeL_fmeasure": 0.04069689961336112, "rougeL_fmeasure_stderr": 0.0008129334087216642, "rougeL_precision": 0.028773567135668157, "rougeL_precision_stderr": 0.0012807801393214825, "rougeL_recall": 0.16962928822415665, "rougeL_recall_stderr": 0.0026788882785198187, "rougeLsum_fmeasure": 0.03168737424504007, "rougeLsum_fmeasure_stderr": 0.0007282033005749001, "rougeLsum_precision": 0.023429395403589064, "rougeLsum_precision_stderr": 0.0012857906320802946, "rougeLsum_recall": 0.13119208466933271, "rougeLsum_recall_stderr": 0.0020710838890581453}, "non-explicit-description": {"bleu": 0.06942821394863977, "bleu_stderr": 0.00721092856708907, "rouge1_fmeasure": 0.054872501275754215, "rouge1_fmeasure_stderr": 0.0011675161117053734, "rouge1_precision": 0.048154071638123803, "rouge1_precision_stderr": 0.0017394492115037211, "rouge1_recall": 0.1603960438769496, "rouge1_recall_stderr": 0.002808863528216255, "rouge2_fmeasure": 0.00644935707690507, "rouge2_fmeasure_stderr": 0.00048277790299858106, "rouge2_precision": 0.0050437692571547055, "rouge2_precision_stderr": 0.00041097690347038956, "rouge2_recall": 0.020874718274915008, "rouge2_recall_stderr": 0.00145782883855474, "rougeL_fmeasure": 0.05035459707061074, "rougeL_fmeasure_stderr": 0.0010577917832862004, "rougeL_precision": 0.044143178163838545, "rougeL_precision_stderr": 0.0015820050437832095, "rougeL_recall": 0.14857144649500334, "rougeL_recall_stderr": 0.0026359059490226556, "rougeLsum_fmeasure": 0.04968291242100845, "rougeLsum_fmeasure_stderr": 0.0010556937198168178, "rougeLsum_precision": 0.04435825561436467, "rougeLsum_precision_stderr": 0.0016624296413482802, "rougeLsum_recall": 0.14581597687921308, "rougeLsum_recall_stderr": 0.0024815309695857325}, "very-explicit-description": {"bleu": 0.70556499870502, "bleu_stderr": 0.07021163423422254, "rouge1_fmeasure": 0.15932996809565791, "rouge1_fmeasure_stderr": 0.0021975666324784045, "rouge1_precision": 0.09839982038939207, "rouge1_precision_stderr": 0.0016034845059760892, "rouge1_recall": 0.557046636180574, "rouge1_recall_stderr": 0.004459443954469606, "rouge2_fmeasure": 0.0645288094675153, "rouge2_fmeasure_stderr": 0.0013271654890972852, "rouge2_precision": 0.03974822393863394, "rouge2_precision_stderr": 0.0009259214471201931, "rouge2_recall": 0.23724700480115124, "rouge2_recall_stderr": 0.003530724556191307, "rougeL_fmeasure": 0.13994234816561368, "rougeL_fmeasure_stderr": 0.001713279737012679, "rougeL_precision": 0.08556676553539082, "rougeL_precision_stderr": 0.0012344258273556512, "rougeL_recall": 0.5120237901085162, "rougeL_recall_stderr": 0.004389079693859156, "rougeLsum_fmeasure": 0.13548370931568984, "rougeLsum_fmeasure_stderr": 0.001965481361266326, "rougeLsum_precision": 0.08372529560944662, "rougeLsum_precision_stderr": 0.001428553657612201, "rougeLsum_recall": 0.47520736805531627, "rougeLsum_recall_stderr": 0.003928372595420955}}, "1": {"PALM_prompt": {"bleu": 0.24093765005017717, "bleu_stderr": 0.0230030873247297, "rouge1_fmeasure": 0.08727381267513917, "rouge1_fmeasure_stderr": 0.002019085465483018, "rouge1_precision": 0.06608421860053589, "rouge1_precision_stderr": 0.002336019334031938, "rouge1_recall": 0.21220436916634114, "rouge1_recall_stderr": 0.004053789065265438, "rouge2_fmeasure": 0.03690045692783099, "rouge2_fmeasure_stderr": 0.0011932344773442297, "rouge2_precision": 0.026958060365485183, "rouge2_precision_stderr": 0.0014325663154050872, "rouge2_recall": 0.09594460677384929, "rouge2_recall_stderr": 0.002636689924376356, "rougeL_fmeasure": 0.08339382486023829, "rougeL_fmeasure_stderr": 0.0018750612270331736, "rougeL_precision": 0.0627130320531275, "rougeL_precision_stderr": 0.0022019609556712485, "rougeL_recall": 0.20593946270610541, "rougeL_recall_stderr": 0.003942128258116945, "rougeLsum_fmeasure": 0.08340965215556247, "rougeLsum_fmeasure_stderr": 0.001888288916759086, "rougeLsum_precision": 0.06314633303729772, "rougeLsum_precision_stderr": 0.00225899090820857, "rougeLsum_recall": 0.20495711011670537, "rougeLsum_recall_stderr": 0.003873468183707786}, "explicit-graph-description2": {"bleu": 0.7928900478653991, "bleu_stderr": 0.13053518516004906, "rouge1_fmeasure": 0.14612427698267197, "rouge1_fmeasure_stderr": 0.0018059192168654533, "rouge1_precision": 0.1725932493840725, "rouge1_precision_stderr": 0.002529386481031639, "rouge1_recall": 0.1604306963623193, "rouge1_recall_stderr": 0.002484650213252913, "rouge2_fmeasure": 0.015048493432535332, "rouge2_fmeasure_stderr": 0.0008510426237769514, "rouge2_precision": 0.018139617142187602, "rouge2_precision_stderr": 0.001178810295076615, "rouge2_recall": 0.017838294270011764, "rouge2_recall_stderr": 0.0010680407233547252, "rougeL_fmeasure": 0.12193728452169696, "rougeL_fmeasure_stderr": 0.0014342614701120422, "rougeL_precision": 0.1452889883810266, "rougeL_precision_stderr": 0.0021500071787494027, "rougeL_recall": 0.13484344118305408, "rougeL_recall_stderr": 0.0020730900587942985, "rougeLsum_fmeasure": 0.12940784544932987, "rougeLsum_fmeasure_stderr": 0.0015587985285077882, "rougeLsum_precision": 0.1538719868843642, "rougeLsum_precision_stderr": 0.0022759439069310527, "rougeLsum_recall": 0.1423996572873628, "rougeLsum_recall_stderr": 0.002170737056924197}, "implicit-graph-description": {"bleu": 0.7343903998488392, "bleu_stderr": 0.1264580334548859, "rouge1_fmeasure": 0.1451954506401401, "rouge1_fmeasure_stderr": 0.0018476593880312048, "rouge1_precision": 0.1713694988441509, "rouge1_precision_stderr": 0.0025683178287328103, "rouge1_recall": 0.1620927633038664, "rouge1_recall_stderr": 0.0026004642336094302, "rouge2_fmeasure": 0.015147654240964752, "rouge2_fmeasure_stderr": 0.0008531589600636162, "rouge2_precision": 0.018149248560522465, "rouge2_precision_stderr": 0.0011980307264669568, "rouge2_recall": 0.018701683687898776, "rouge2_recall_stderr": 0.0011131166018444143, "rougeL_fmeasure": 0.12085106733704139, "rougeL_fmeasure_stderr": 0.0014628495715754606, "rougeL_precision": 0.1438174575375414, "rougeL_precision_stderr": 0.002179050759564507, "rougeL_recall": 0.13648488466088793, "rougeL_recall_stderr": 0.002207259216754237, "rougeLsum_fmeasure": 0.12811642696509545, "rougeLsum_fmeasure_stderr": 0.0015874918061787038, "rougeLsum_precision": 0.1522841828379344, "rougeLsum_precision_stderr": 0.002306578353042297, "rougeLsum_recall": 0.14316734345889626, "rougeLsum_recall_stderr": 0.002243661271505656}, "non-explicit-description": {"bleu": 0.7207771490116592, "bleu_stderr": 0.10258109032487232, "rouge1_fmeasure": 0.15006174394417673, "rouge1_fmeasure_stderr": 0.0019223053607641332, "rouge1_precision": 0.16455416189125624, "rouge1_precision_stderr": 0.0025710885946127505, "rouge1_recall": 0.2181617130834446, "rouge1_recall_stderr": 0.004232534637085028, "rouge2_fmeasure": 0.023945131921722818, "rouge2_fmeasure_stderr": 0.001073921369797315, "rouge2_precision": 0.023103353875520607, "rouge2_precision_stderr": 0.0012429378612023923, "rouge2_recall": 0.051688701593469964, "rouge2_recall_stderr": 0.0024769333300300395, "rougeL_fmeasure": 0.12712565779418603, "rougeL_fmeasure_stderr": 0.0015763640466711865, "rougeL_precision": 0.13925189436630736, "rougeL_precision_stderr": 0.0021920195516649307, "rougeL_recall": 0.19238762602768703, "rougeL_recall_stderr": 0.0040704144827113, "rougeLsum_fmeasure": 0.13222747966833925, "rougeLsum_fmeasure_stderr": 0.001685437840262139, "rougeLsum_precision": 0.14613811038014662, "rougeLsum_precision_stderr": 0.0023432542987913535, "rougeLsum_recall": 0.19219266107645588, "rougeLsum_recall_stderr": 0.0036970487118417475}, "very-explicit-description": {"bleu": 0.7666335106831184, "bleu_stderr": 0.0635865012850457, "rouge1_fmeasure": 0.1795535626460401, "rouge1_fmeasure_stderr": 0.0023078723613743383, "rouge1_precision": 0.15192244328141744, "rouge1_precision_stderr": 0.0026851175239504146, "rouge1_recall": 0.4088076221119547, "rouge1_recall_stderr": 0.005307206147263785, "rouge2_fmeasure": 0.056216955504673816, "rouge2_fmeasure_stderr": 0.0015622457935091263, "rouge2_precision": 0.042264113746990004, "rouge2_precision_stderr": 0.001519132737951407, "rouge2_recall": 0.15816170460414825, "rouge2_recall_stderr": 0.003648850558319156, "rougeL_fmeasure": 0.15827887417485154, "rougeL_fmeasure_stderr": 0.0019506890461920023, "rougeL_precision": 0.13222718563169758, "rougeL_precision_stderr": 0.0023137442474583567, "rougeL_recall": 0.37772895967872394, "rougeL_recall_stderr": 0.005250016280580306, "rougeLsum_fmeasure": 0.15584864004949936, "rougeLsum_fmeasure_stderr": 0.0020620407818320073, "rougeLsum_precision": 0.1331053325871554, "rougeLsum_precision_stderr": 0.0024745486155965113, "rougeLsum_recall": 0.3552035560328879, "rougeLsum_recall_stderr": 0.004588945170502327}}, "2": {"PALM_prompt": {"bleu": 0.233031239019028, "bleu_stderr": 0.018622989143777895, "rouge1_fmeasure": 0.0894179415607547, "rouge1_fmeasure_stderr": 0.0020679188927635165, "rouge1_precision": 0.0669033461265895, "rouge1_precision_stderr": 0.0022457356623633616, "rouge1_recall": 0.2210311245370995, "rouge1_recall_stderr": 0.004136964100813092, "rouge2_fmeasure": 0.03847532772409176, "rouge2_fmeasure_stderr": 0.0011774128694059217, "rouge2_precision": 0.02737441536683152, "rouge2_precision_stderr": 0.0012163404235599126, "rouge2_recall": 0.10228061138551528, "rouge2_recall_stderr": 0.0027171296640774193, "rougeL_fmeasure": 0.08484682008716848, "rougeL_fmeasure_stderr": 0.0018654847610348828, "rougeL_precision": 0.0627740873189878, "rougeL_precision_stderr": 0.0020419307238977585, "rougeL_recall": 0.21409346975466498, "rougeL_recall_stderr": 0.004009271610351055, "rougeLsum_fmeasure": 0.08486627231660553, "rougeLsum_fmeasure_stderr": 0.0018862109332949393, "rougeLsum_precision": 0.06306585978755326, "rougeLsum_precision_stderr": 0.002078016863885293, "rougeLsum_recall": 0.21306042006535458, "rougeLsum_recall_stderr": 0.003959496879716326}, "explicit-graph-description2": {"bleu": 1.035688482397717, "bleu_stderr": 0.07979180056283594, "rouge1_fmeasure": 0.1907465620653806, "rouge1_fmeasure_stderr": 0.0025141994758677614, "rouge1_precision": 0.21086754974453098, "rouge1_precision_stderr": 0.003481087853726294, "rouge1_recall": 0.25038766375814814, "rouge1_recall_stderr": 0.004322057797344624, "rouge2_fmeasure": 0.04229150796312311, "rouge2_fmeasure_stderr": 0.0016292319117673752, "rouge2_precision": 0.046020553876655636, "rouge2_precision_stderr": 0.002081339235092095, "rouge2_recall": 0.06454664623813919, "rouge2_recall_stderr": 0.0024885004130050735, "rougeL_fmeasure": 0.15945985445699104, "rougeL_fmeasure_stderr": 0.00211755282820529, "rougeL_precision": 0.17652392027199135, "rougeL_precision_stderr": 0.003026510309888636, "rougeL_recall": 0.2138167297173281, "rougeL_recall_stderr": 0.003888991530572478, "rougeLsum_fmeasure": 0.16758058695841305, "rougeLsum_fmeasure_stderr": 0.002234265089238999, "rougeLsum_precision": 0.18652878286693028, "rougeLsum_precision_stderr": 0.003173835353071881, "rougeLsum_recall": 0.21970003626695772, "rougeLsum_recall_stderr": 0.003760387533497808}, "implicit-graph-description": {"bleu": 0.8950469728314032, "bleu_stderr": 0.06813864403493262, "rouge1_fmeasure": 0.19598922586079662, "rouge1_fmeasure_stderr": 0.002726474308866776, "rouge1_precision": 0.2245739569864698, "rouge1_precision_stderr": 0.00394768209564399, "rouge1_recall": 0.2605817818040319, "rouge1_recall_stderr": 0.004397225408663707, "rouge2_fmeasure": 0.05131564722525416, "rouge2_fmeasure_stderr": 0.0018242084011574487, "rouge2_precision": 0.05842649937474419, "rouge2_precision_stderr": 0.0025228225800757995, "rouge2_recall": 0.07857813168613167, "rouge2_recall_stderr": 0.002700572476720445, "rougeL_fmeasure": 0.16612890671250619, "rougeL_fmeasure_stderr": 0.002321135524749462, "rougeL_precision": 0.19008397370997976, "rougeL_precision_stderr": 0.003446693936021226, "rougeL_recall": 0.2280794378801942, "rougeL_recall_stderr": 0.004143086530714078, "rougeLsum_fmeasure": 0.17165207996551565, "rougeLsum_fmeasure_stderr": 0.0024000508163840063, "rougeLsum_precision": 0.19875997968955347, "rougeLsum_precision_stderr": 0.003614311300045863, "rougeLsum_recall": 0.22698955916927938, "rougeLsum_recall_stderr": 0.0037471810061475252}, "non-explicit-description": {"bleu": 0.8172765885695147, "bleu_stderr": 0.050751327067539105, "rouge1_fmeasure": 0.18077149568804796, "rouge1_fmeasure_stderr": 0.0024357765771144814, "rouge1_precision": 0.1840518032934248, "rouge1_precision_stderr": 0.0033695422988167393, "rouge1_recall": 0.32294580089178854, "rouge1_recall_stderr": 0.005293640122083205, "rouge2_fmeasure": 0.050683857012723176, "rouge2_fmeasure_stderr": 0.0016201788737892754, "rouge2_precision": 0.048339216717895615, "rouge2_precision_stderr": 0.0021060002829076923, "rouge2_recall": 0.11482202688362075, "rouge2_recall_stderr": 0.0034238195259001535, "rougeL_fmeasure": 0.15676881729864572, "rougeL_fmeasure_stderr": 0.002052846994821611, "rougeL_precision": 0.1584513791779987, "rougeL_precision_stderr": 0.002944143746092905, "rougeL_recall": 0.2930866134885274, "rougeL_recall_stderr": 0.005175150990392834, "rougeLsum_fmeasure": 0.15894468731506023, "rougeLsum_fmeasure_stderr": 0.0021537616662293205, "rougeLsum_precision": 0.163518547765267, "rougeLsum_precision_stderr": 0.003098199168660185, "rougeLsum_recall": 0.28299351899056346, "rougeLsum_recall_stderr": 0.004588174178628674}, "very-explicit-description": {"bleu": 0.8036503769411473, "bleu_stderr": 0.08352144767185858, "rouge1_fmeasure": 0.1995262410573067, "rouge1_fmeasure_stderr": 0.0024883401069483438, "rouge1_precision": 0.18082568461374282, "rouge1_precision_stderr": 0.003639459051365589, "rouge1_recall": 0.4264948458628402, "rouge1_recall_stderr": 0.004935119334379477, "rouge2_fmeasure": 0.07054805798392726, "rouge2_fmeasure_stderr": 0.0016830190110589496, "rouge2_precision": 0.06212406576427143, "rouge2_precision_stderr": 0.0023869401244636375, "rouge2_recall": 0.17467748933722366, "rouge2_recall_stderr": 0.003536521311814631, "rougeL_fmeasure": 0.1770227263859978, "rougeL_fmeasure_stderr": 0.0020868398943825295, "rougeL_precision": 0.15912923447907845, "rougeL_precision_stderr": 0.0032345958330729694, "rougeL_recall": 0.39493138391122135, "rougeL_recall_stderr": 0.004915557812492545, "rougeLsum_fmeasure": 0.1741607140832361, "rougeLsum_fmeasure_stderr": 0.0022309499919646618, "rougeLsum_precision": 0.15978841140185437, "rougeLsum_precision_stderr": 0.0033957422368174534, "rougeLsum_recall": 0.37244471989166417, "rougeLsum_recall_stderr": 0.004290031616626638}}, "3": {"PALM_prompt": {"bleu": 0.23948204833652226, "bleu_stderr": 0.02987129954951102, "rouge1_fmeasure": 0.08516966035997384, "rouge1_fmeasure_stderr": 0.0019653410958603845, "rouge1_precision": 0.06118912078285516, "rouge1_precision_stderr": 0.0018874100598924987, "rouge1_recall": 0.21690381058073044, "rouge1_recall_stderr": 0.004037786222557515, "rouge2_fmeasure": 0.036941223370484465, "rouge2_fmeasure_stderr": 0.0011451903532238477, "rouge2_precision": 0.025691103456522035, "rouge2_precision_stderr": 0.0009761929133606694, "rouge2_recall": 0.09913747741149387, "rouge2_recall_stderr": 0.0026775514547381393, "rougeL_fmeasure": 0.08113455126274569, "rougeL_fmeasure_stderr": 0.0017918361111978508, "rougeL_precision": 0.057680973751972266, "rougeL_precision_stderr": 0.001710672833760511, "rougeL_recall": 0.21034092384649808, "rougeL_recall_stderr": 0.003897565257297013, "rougeLsum_fmeasure": 0.08135704576559996, "rougeLsum_fmeasure_stderr": 0.001809777621388229, "rougeLsum_precision": 0.058077399704829655, "rougeLsum_precision_stderr": 0.0017441871631557635, "rougeLsum_recall": 0.2102632726423538, "rougeLsum_recall_stderr": 0.003885092717560431}, "explicit-graph-description2": {"bleu": 1.4760612258997623, "bleu_stderr": 0.1344204674137375, "rouge1_fmeasure": 0.2155827425525286, "rouge1_fmeasure_stderr": 0.0028813429865885256, "rouge1_precision": 0.24945289061580536, "rouge1_precision_stderr": 0.004234785605850604, "rouge1_recall": 0.27695089051506727, "rouge1_recall_stderr": 0.004409164518913342, "rouge2_fmeasure": 0.05870693940495815, "rouge2_fmeasure_stderr": 0.0019495378220205084, "rouge2_precision": 0.06789221743946505, "rouge2_precision_stderr": 0.002570766391550549, "rouge2_recall": 0.08374766031914743, "rouge2_recall_stderr": 0.002770294903947266, "rougeL_fmeasure": 0.1799141924288802, "rougeL_fmeasure_stderr": 0.0023927111472121488, "rougeL_precision": 0.20844479328303336, "rougeL_precision_stderr": 0.0036153889991536138, "rougeL_recall": 0.2362160338231045, "rougeL_recall_stderr": 0.003930516775517815, "rougeLsum_fmeasure": 0.18810543014252795, "rougeLsum_fmeasure_stderr": 0.00247964907125972, "rougeLsum_precision": 0.21975278899164405, "rougeLsum_precision_stderr": 0.0038037794972502884, "rougeLsum_recall": 0.24068798176788908, "rougeLsum_recall_stderr": 0.0037486405001691237}, "implicit-graph-description": {"bleu": 1.0328999153691525, "bleu_stderr": 0.15218614003775394, "rouge1_fmeasure": 0.21578406129162228, "rouge1_fmeasure_stderr": 0.003014899947915645, "rouge1_precision": 0.2561698960860146, "rouge1_precision_stderr": 0.004586388937239763, "rouge1_recall": 0.2889121821334627, "rouge1_recall_stderr": 0.004530658302121611, "rouge2_fmeasure": 0.06387420889427227, "rouge2_fmeasure_stderr": 0.0020202475334694754, "rouge2_precision": 0.0756534526059316, "rouge2_precision_stderr": 0.0028609493658018873, "rouge2_recall": 0.09619008514491978, "rouge2_recall_stderr": 0.0029113141102096165, "rougeL_fmeasure": 0.1831681982588611, "rougeL_fmeasure_stderr": 0.0025326526566856545, "rougeL_precision": 0.2162947950404591, "rougeL_precision_stderr": 0.003924794592166209, "rougeL_recall": 0.25398159222153943, "rougeL_recall_stderr": 0.004265191984478587, "rougeLsum_fmeasure": 0.18805417860233245, "rougeLsum_fmeasure_stderr": 0.0026019608331974494, "rougeLsum_precision": 0.22494600706563603, "rougeLsum_precision_stderr": 0.00410385917148987, "rougeLsum_recall": 0.25165737151714396, "rougeLsum_recall_stderr": 0.0038988712792617612}, "non-explicit-description": {"bleu": 0.8232200037553864, "bleu_stderr": 0.0990062331795447, "rouge1_fmeasure": 0.20493909153322515, "rouge1_fmeasure_stderr": 0.002712950527519467, "rouge1_precision": 0.21339019477940072, "rouge1_precision_stderr": 0.004136854182002759, "rouge1_recall": 0.36172056828283894, "rouge1_recall_stderr": 0.005192684637361719, "rouge2_fmeasure": 0.06763126769213473, "rouge2_fmeasure_stderr": 0.0019242155202766913, "rouge2_precision": 0.06875941454459712, "rouge2_precision_stderr": 0.0027123104753735013, "rouge2_recall": 0.14026102787918196, "rouge2_recall_stderr": 0.0035390177764360073, "rougeL_fmeasure": 0.178978835556707, "rougeL_fmeasure_stderr": 0.0023052454882151385, "rougeL_precision": 0.18497245590804035, "rougeL_precision_stderr": 0.0036161696226493217, "rougeL_recall": 0.32950137116051886, "rougeL_recall_stderr": 0.00509054750559144, "rougeLsum_fmeasure": 0.17901076394504797, "rougeLsum_fmeasure_stderr": 0.0023948023589502633, "rougeLsum_precision": 0.18870247477597205, "rougeLsum_precision_stderr": 0.0037949931460892973, "rougeLsum_recall": 0.31437773260575974, "rougeLsum_recall_stderr": 0.004468573983043156}, "very-explicit-description": {"bleu": 0.9213985772786043, "bleu_stderr": 0.09261357714373215, "rouge1_fmeasure": 0.20998314436048413, "rouge1_fmeasure_stderr": 0.0026958272587435566, "rouge1_precision": 0.20161323138180814, "rouge1_precision_stderr": 0.00419614102063665, "rouge1_recall": 0.4284437489377629, "rouge1_recall_stderr": 0.004747351313475394, "rouge2_fmeasure": 0.07655162918992574, "rouge2_fmeasure_stderr": 0.001828909076942692, "rouge2_precision": 0.07075454828433567, "rouge2_precision_stderr": 0.0025432659534373814, "rouge2_recall": 0.17982051325515352, "rouge2_recall_stderr": 0.0035838481085783518, "rougeL_fmeasure": 0.1863768106489585, "rougeL_fmeasure_stderr": 0.002259606335187374, "rougeL_precision": 0.17689528139425809, "rougeL_precision_stderr": 0.0036676387595752816, "rougeL_recall": 0.39708608920535615, "rougeL_recall_stderr": 0.004751490642713908, "rougeLsum_fmeasure": 0.18356302820432102, "rougeLsum_fmeasure_stderr": 0.0024163227573974415, "rougeLsum_precision": 0.1782191584681922, "rougeLsum_precision_stderr": 0.0038632898410200503, "rougeLsum_recall": 0.37361714826137415, "rougeLsum_recall_stderr": 0.004151105270083309}}, "4": {"PALM_prompt": {"bleu": 0.23249910495908727, "bleu_stderr": 0.029070914247746167, "rouge1_fmeasure": 0.08752577226961719, "rouge1_fmeasure_stderr": 0.001988775968059262, "rouge1_precision": 0.06371462011124131, "rouge1_precision_stderr": 0.0019856815389907496, "rouge1_recall": 0.2217109306879308, "rouge1_recall_stderr": 0.003947138405833181, "rouge2_fmeasure": 0.03752454514172853, "rouge2_fmeasure_stderr": 0.001129399736645277, "rouge2_precision": 0.026831790674440324, "rouge2_precision_stderr": 0.001147287521474585, "rouge2_recall": 0.10130367868890543, "rouge2_recall_stderr": 0.0025744699017443726, "rougeL_fmeasure": 0.08315980831393469, "rougeL_fmeasure_stderr": 0.0017913382350644002, "rougeL_precision": 0.05966276178114979, "rougeL_precision_stderr": 0.0017451674391315488, "rougeL_recall": 0.21514059835508997, "rougeL_recall_stderr": 0.0038265964816655085, "rougeLsum_fmeasure": 0.08355071339347263, "rougeLsum_fmeasure_stderr": 0.0018285792853000794, "rougeLsum_precision": 0.060216714540077834, "rougeLsum_precision_stderr": 0.0017985198514999731, "rougeLsum_recall": 0.21496362989260864, "rougeLsum_recall_stderr": 0.0038038383785956613}, "explicit-graph-description2": {"bleu": 1.4525772102533077, "bleu_stderr": 0.09991573236218197, "rouge1_fmeasure": 0.23277564344319238, "rouge1_fmeasure_stderr": 0.003033324883659851, "rouge1_precision": 0.2843113295309021, "rouge1_precision_stderr": 0.004668202372322337, "rouge1_recall": 0.2840533638331491, "rouge1_recall_stderr": 0.004212420357643946, "rouge2_fmeasure": 0.06941913517884618, "rouge2_fmeasure_stderr": 0.0021154296895269154, "rouge2_precision": 0.0855143860050401, "rouge2_precision_stderr": 0.0028864190680247532, "rouge2_recall": 0.09058538636969998, "rouge2_recall_stderr": 0.002732539141359347, "rougeL_fmeasure": 0.1969438842373623, "rougeL_fmeasure_stderr": 0.002591786476274177, "rougeL_precision": 0.24061329765622666, "rougeL_precision_stderr": 0.0040548049932821436, "rougeL_recall": 0.2451732467444115, "rougeL_recall_stderr": 0.003800935008924143, "rougeLsum_fmeasure": 0.20445884584732335, "rougeLsum_fmeasure_stderr": 0.002660931367820124, "rougeLsum_precision": 0.2513996234316726, "rougeLsum_precision_stderr": 0.004211644541437699, "rougeLsum_recall": 0.24901463870097312, "rougeLsum_recall_stderr": 0.0036333762169696126}, "implicit-graph-description": {"bleu": 1.1848438797934058, "bleu_stderr": 0.10113709897455576, "rouge1_fmeasure": 0.23507855170285297, "rouge1_fmeasure_stderr": 0.003052960609273728, "rouge1_precision": 0.29191494117074934, "rouge1_precision_stderr": 0.0048753981466979806, "rouge1_recall": 0.2982482419805292, "rouge1_recall_stderr": 0.004362410145983633, "rouge2_fmeasure": 0.07429307318653183, "rouge2_fmeasure_stderr": 0.00210574121245154, "rouge2_precision": 0.09264180506664611, "rouge2_precision_stderr": 0.0030441647048234763, "rouge2_recall": 0.10366534795774687, "rouge2_recall_stderr": 0.002886282274864994, "rougeL_fmeasure": 0.20109163102106464, "rougeL_fmeasure_stderr": 0.002579325282186586, "rougeL_precision": 0.24944159316145484, "rougeL_precision_stderr": 0.004252142505917989, "rougeL_recall": 0.26299891956132837, "rougeL_recall_stderr": 0.004120440439329194, "rougeLsum_fmeasure": 0.2054154975263721, "rougeLsum_fmeasure_stderr": 0.0026590981957875476, "rougeLsum_precision": 0.25743050877782325, "rougeLsum_precision_stderr": 0.004415034565833493, "rougeLsum_recall": 0.25969769735732884, "rougeLsum_recall_stderr": 0.003739952618361054}, "non-explicit-description": {"bleu": 0.9445938185251443, "bleu_stderr": 0.09197403951561334, "rouge1_fmeasure": 0.2219276041114797, "rouge1_fmeasure_stderr": 0.003043064081835907, "rouge1_precision": 0.24073524945767655, "rouge1_precision_stderr": 0.004707514658959234, "rouge1_recall": 0.37222777341340585, "rouge1_recall_stderr": 0.005100896049800429, "rouge2_fmeasure": 0.07863230130745369, "rouge2_fmeasure_stderr": 0.002070687205307558, "rouge2_precision": 0.08343573004395873, "rouge2_precision_stderr": 0.0029901511610586507, "rouge2_recall": 0.1518671437094746, "rouge2_recall_stderr": 0.0036105628962266956, "rougeL_fmeasure": 0.19296499339169593, "rougeL_fmeasure_stderr": 0.0025313363149249185, "rougeL_precision": 0.2082283461845528, "rougeL_precision_stderr": 0.004101899636825203, "rougeL_recall": 0.3382840179138503, "rougeL_recall_stderr": 0.005009409498199744, "rougeLsum_fmeasure": 0.19372279394640554, "rougeLsum_fmeasure_stderr": 0.0026569278397281402, "rougeLsum_precision": 0.21271916694519305, "rougeLsum_precision_stderr": 0.00429809264467102, "rougeLsum_recall": 0.32505704280562625, "rougeLsum_recall_stderr": 0.004417521948317753}, "very-explicit-description": {"bleu": 0.8669579841221637, "bleu_stderr": 0.08180503550292748, "rouge1_fmeasure": 0.2203882766391892, "rouge1_fmeasure_stderr": 0.003092911335667608, "rouge1_precision": 0.22590230879356132, "rouge1_precision_stderr": 0.004885739171030689, "rouge1_recall": 0.4183617818367607, "rouge1_recall_stderr": 0.004646737629595512, "rouge2_fmeasure": 0.08291524084048744, "rouge2_fmeasure_stderr": 0.001997647183245828, "rouge2_precision": 0.08369577119049203, "rouge2_precision_stderr": 0.0029217645456625545, "rouge2_recall": 0.17612573754542205, "rouge2_recall_stderr": 0.0035387797965567075, "rougeL_fmeasure": 0.19451918196713988, "rougeL_fmeasure_stderr": 0.0025549062189435443, "rougeL_precision": 0.19700967528722899, "rougeL_precision_stderr": 0.00420918433484521, "rougeL_recall": 0.3860053232564878, "rougeL_recall_stderr": 0.004602487402565482, "rougeLsum_fmeasure": 0.1912941284472393, "rougeLsum_fmeasure_stderr": 0.00269200231433118, "rougeLsum_precision": 0.19774103481256436, "rougeLsum_precision_stderr": 0.004390815504231839, "rougeLsum_recall": 0.3641519676371665, "rougeLsum_recall_stderr": 0.004085150565177179}}, "5": {"PALM_prompt": {"bleu": 0.22081286066426478, "bleu_stderr": 0.02274325142571731, "rouge1_fmeasure": 0.09147989131946721, "rouge1_fmeasure_stderr": 0.002205390270555857, "rouge1_precision": 0.06624132704477384, "rouge1_precision_stderr": 0.002139644596424952, "rouge1_recall": 0.22692456733410507, "rouge1_recall_stderr": 0.004091333470382997, "rouge2_fmeasure": 0.03903350582230713, "rouge2_fmeasure_stderr": 0.00122131545348737, "rouge2_precision": 0.02768138829423821, "rouge2_precision_stderr": 0.0011951077136256075, "rouge2_recall": 0.10373515662428917, "rouge2_recall_stderr": 0.0027182295029531743, "rougeL_fmeasure": 0.0861662328266173, "rougeL_fmeasure_stderr": 0.0019549257073660665, "rougeL_precision": 0.06135390440151855, "rougeL_precision_stderr": 0.001830142897495819, "rougeL_recall": 0.2187629645915043, "rougeL_recall_stderr": 0.003916988813854803, "rougeLsum_fmeasure": 0.0861577572212862, "rougeLsum_fmeasure_stderr": 0.0019790861267728415, "rougeLsum_precision": 0.06154502173900534, "rougeLsum_precision_stderr": 0.0018661465944842495, "rougeLsum_recall": 0.21792767487533674, "rougeLsum_recall_stderr": 0.0038849552847416537}, "explicit-graph-description2": {"bleu": 1.5535486678139987, "bleu_stderr": 0.12636178897739148, "rouge1_fmeasure": 0.24428760561857069, "rouge1_fmeasure_stderr": 0.0032770994487209613, "rouge1_precision": 0.3079953699411845, "rouge1_precision_stderr": 0.0052262700820691855, "rouge1_recall": 0.2990488948523553, "rouge1_recall_stderr": 0.004288889844947112, "rouge2_fmeasure": 0.07628545628688663, "rouge2_fmeasure_stderr": 0.0022936944399277277, "rouge2_precision": 0.09878776942732348, "rouge2_precision_stderr": 0.003295722862390014, "rouge2_recall": 0.09931014156182742, "rouge2_recall_stderr": 0.00285802935368016, "rougeL_fmeasure": 0.20682176993182852, "rougeL_fmeasure_stderr": 0.002772758722894698, "rougeL_precision": 0.26091647796513057, "rougeL_precision_stderr": 0.004518606930149643, "rougeL_recall": 0.25924988114075476, "rougeL_recall_stderr": 0.003916835233501827, "rougeLsum_fmeasure": 0.21367093331703443, "rougeLsum_fmeasure_stderr": 0.0028579521145505734, "rougeLsum_precision": 0.2723426426961616, "rougeLsum_precision_stderr": 0.004751841751952048, "rougeLsum_recall": 0.26102854194007674, "rougeLsum_recall_stderr": 0.0037065483496949393}, "implicit-graph-description": {"bleu": 1.5291942430627856, "bleu_stderr": 0.12407405811462165, "rouge1_fmeasure": 0.25074445308374155, "rouge1_fmeasure_stderr": 0.003280258168301159, "rouge1_precision": 0.31817552785971276, "rouge1_precision_stderr": 0.005410918975610136, "rouge1_recall": 0.3120825384968243, "rouge1_recall_stderr": 0.004423006280320729, "rouge2_fmeasure": 0.08476444821349621, "rouge2_fmeasure_stderr": 0.002405571742153227, "rouge2_precision": 0.11020791501317337, "rouge2_precision_stderr": 0.0037693744168018546, "rouge2_recall": 0.114958415897581, "rouge2_recall_stderr": 0.003095157786907941, "rougeL_fmeasure": 0.21469809323475145, "rougeL_fmeasure_stderr": 0.002784872235569171, "rougeL_precision": 0.2709202785200124, "rougeL_precision_stderr": 0.004714816857530811, "rougeL_recall": 0.2753613124108194, "rougeL_recall_stderr": 0.004151926334002586, "rougeLsum_fmeasure": 0.21938893610824683, "rougeLsum_fmeasure_stderr": 0.0028754942341362322, "rougeLsum_precision": 0.28086204710118207, "rougeLsum_precision_stderr": 0.004933792503425808, "rougeLsum_recall": 0.27229001474697245, "rougeLsum_recall_stderr": 0.003806628830915923}, "non-explicit-description": {"bleu": 1.0828952105973466, "bleu_stderr": 0.08428338494461286, "rouge1_fmeasure": 0.23144425717217612, "rouge1_fmeasure_stderr": 0.0031085174926612394, "rouge1_precision": 0.26082598406654584, "rouge1_precision_stderr": 0.0051359481387504655, "rouge1_recall": 0.3729649784159858, "rouge1_recall_stderr": 0.004970827450327891, "rouge2_fmeasure": 0.08355597806644839, "rouge2_fmeasure_stderr": 0.002189706487328105, "rouge2_precision": 0.09576839794606369, "rouge2_precision_stderr": 0.0035521748356882993, "rouge2_recall": 0.1532169603638125, "rouge2_recall_stderr": 0.003593558920823611, "rougeL_fmeasure": 0.2012929842528386, "rougeL_fmeasure_stderr": 0.002610242315697286, "rougeL_precision": 0.22583373330234002, "rougeL_precision_stderr": 0.004534237669240004, "rougeL_recall": 0.3374628358280029, "rougeL_recall_stderr": 0.004847122676128511, "rougeLsum_fmeasure": 0.2020118383660613, "rougeLsum_fmeasure_stderr": 0.002720953194833289, "rougeLsum_precision": 0.23006368905799984, "rougeLsum_precision_stderr": 0.00470015880813854, "rougeLsum_recall": 0.3255358190359428, "rougeLsum_recall_stderr": 0.004336770681537931}, "very-explicit-description": {"bleu": 0.949825770028968, "bleu_stderr": 0.07260355364195568, "rouge1_fmeasure": 0.221868544853033, "rouge1_fmeasure_stderr": 0.003062043218874345, "rouge1_precision": 0.2333099109106658, "rouge1_precision_stderr": 0.004918167379356697, "rouge1_recall": 0.40468387245463144, "rouge1_recall_stderr": 0.004615290964853879, "rouge2_fmeasure": 0.08265916020277501, "rouge2_fmeasure_stderr": 0.001971954733869372, "rouge2_precision": 0.08575749645339896, "rouge2_precision_stderr": 0.0029475165939869268, "rouge2_recall": 0.16978922632954452, "rouge2_recall_stderr": 0.00346238061542852, "rougeL_fmeasure": 0.19660756094703621, "rougeL_fmeasure_stderr": 0.0025597130950611555, "rougeL_precision": 0.20506533085409256, "rougeL_precision_stderr": 0.004283642129477899, "rougeL_recall": 0.37349518444433055, "rougeL_recall_stderr": 0.004568965644534639, "rougeLsum_fmeasure": 0.19418171975741957, "rougeLsum_fmeasure_stderr": 0.002689265498940047, "rougeLsum_precision": 0.20613211140017276, "rougeLsum_precision_stderr": 0.004444626909561573, "rougeLsum_recall": 0.3539709992804558, "rougeLsum_recall_stderr": 0.004068457607282714}}}, "GEM/wiki_lingua_en": {"0": {"article_summary_en": {"bleu": 2.526005202191506, "bleu_stderr": 0.058273418733684156, "rouge1_fmeasure": 0.23668156796351772, "rouge1_fmeasure_stderr": 0.0017834284452177681, "rouge1_precision": 0.1975977152505175, "rouge1_precision_stderr": 0.0019210439152713556, "rouge1_recall": 0.3571450155403217, "rouge1_recall_stderr": 0.0027159219787335877, "rouge2_fmeasure": 0.056425445419599346, "rouge2_fmeasure_stderr": 0.0009610968832196708, "rouge2_precision": 0.04679995732393755, "rouge2_precision_stderr": 0.0008453000647757492, "rouge2_recall": 0.08872427020642143, "rouge2_recall_stderr": 0.001757359421073499, "rougeL_fmeasure": 0.15899265084812766, "rougeL_fmeasure_stderr": 0.001137037008281727, "rougeL_precision": 0.13079025220025553, "rougeL_precision_stderr": 0.001163188240810311, "rougeL_recall": 0.24818548845368613, "rougeL_recall_stderr": 0.002206255979336452, "rougeLsum_fmeasure": 0.21937178453510964, "rougeLsum_fmeasure_stderr": 0.0016467565432194853, "rougeLsum_precision": 0.18296614395904412, "rougeLsum_precision_stderr": 0.001771259927022808, "rougeLsum_recall": 0.3318205839417274, "rougeLsum_recall_stderr": 0.0025527759940675494}, "rephrase_en": {"bleu": 0.6145809763628965, "bleu_stderr": 0.05797766316597433, "rouge1_fmeasure": 0.10814475341505056, "rouge1_fmeasure_stderr": 0.001524767202301828, "rouge1_precision": 0.09557902324303133, "rouge1_precision_stderr": 0.0015934233011554208, "rouge1_recall": 0.15195533247270118, "rouge1_recall_stderr": 0.002167076868910249, "rouge2_fmeasure": 0.011588030659270141, "rouge2_fmeasure_stderr": 0.0004784186900067079, "rouge2_precision": 0.010021499848992905, "rouge2_precision_stderr": 0.00042254787804722635, "rouge2_recall": 0.0175643734135993, "rouge2_recall_stderr": 0.0008555885514606594, "rougeL_fmeasure": 0.09167513906886186, "rougeL_fmeasure_stderr": 0.0012095215627801854, "rougeL_precision": 0.08020042698947122, "rougeL_precision_stderr": 0.0012460897241632469, "rougeL_recall": 0.13117713414918547, "rougeL_recall_stderr": 0.0018429649355817298, "rougeLsum_fmeasure": 0.10198753546847392, "rougeLsum_fmeasure_stderr": 0.0014211405382431829, "rougeLsum_precision": 0.08997576030672756, "rougeLsum_precision_stderr": 0.0014864753127451467, "rougeLsum_recall": 0.1439070530072875, "rougeLsum_recall_stderr": 0.002057912841397234}, "summarize_above_en": {"bleu": 0.2622610605101529, "bleu_stderr": 0.040142958508706195, "rouge1_fmeasure": 0.06398795450213274, "rouge1_fmeasure_stderr": 0.0014079644213715252, "rouge1_precision": 0.06083058515622332, "rouge1_precision_stderr": 0.0015146719263047607, "rouge1_recall": 0.08130079272174061, "rouge1_recall_stderr": 0.0017565791128464651, "rouge2_fmeasure": 0.005239039550783603, "rouge2_fmeasure_stderr": 0.0003252538524207895, "rouge2_precision": 0.0050127471219516785, "rouge2_precision_stderr": 0.00033788530642644006, "rouge2_recall": 0.006932802618765814, "rouge2_recall_stderr": 0.00047568921018848896, "rougeL_fmeasure": 0.05958527780779474, "rougeL_fmeasure_stderr": 0.0012805893801553889, "rougeL_precision": 0.056280510609653965, "rougeL_precision_stderr": 0.0013608121285239516, "rougeL_recall": 0.07622653073256713, "rougeL_recall_stderr": 0.0016225806985423702, "rougeLsum_fmeasure": 0.05971017630902041, "rougeLsum_fmeasure_stderr": 0.0013030395843137727, "rougeLsum_precision": 0.05661921621032954, "rougeLsum_precision_stderr": 0.001398882606899158, "rougeLsum_recall": 0.0762057149480866, "rougeLsum_recall_stderr": 0.00164425094307365}, "tldr_en": {"bleu": 0.7280055452213583, "bleu_stderr": 0.06972461776949417, "rouge1_fmeasure": 0.11655773215808052, "rouge1_fmeasure_stderr": 0.0015844131287312152, "rouge1_precision": 0.10367122573898108, "rouge1_precision_stderr": 0.0016311453272566847, "rouge1_recall": 0.1621848426933469, "rouge1_recall_stderr": 0.002239905291843337, "rouge2_fmeasure": 0.014011218070534905, "rouge2_fmeasure_stderr": 0.0005398251030136181, "rouge2_precision": 0.012276074546298633, "rouge2_precision_stderr": 0.0005003323251677242, "rouge2_recall": 0.020862324606727274, "rouge2_recall_stderr": 0.0009624161724708909, "rougeL_fmeasure": 0.10161863320310922, "rougeL_fmeasure_stderr": 0.0012820750167481308, "rougeL_precision": 0.08966835876675931, "rougeL_precision_stderr": 0.00130510256531558, "rougeL_recall": 0.14342974130461322, "rougeL_recall_stderr": 0.0019375952453362063, "rougeLsum_fmeasure": 0.10956185275309588, "rougeLsum_fmeasure_stderr": 0.0014665536918047877, "rougeLsum_precision": 0.09728691738451277, "rougeLsum_precision_stderr": 0.0015087459211237015, "rougeLsum_recall": 0.1530554381860451, "rougeLsum_recall_stderr": 0.0021142656537031777}, "write_abstract_en": {"bleu": 0.8397297265679019, "bleu_stderr": 0.06419556092589017, "rouge1_fmeasure": 0.11770828296503491, "rouge1_fmeasure_stderr": 0.0017536210085080694, "rouge1_precision": 0.11295901916552518, "rouge1_precision_stderr": 0.0021561636775618825, "rouge1_recall": 0.15716194752418464, "rouge1_recall_stderr": 0.0023658263657045466, "rouge2_fmeasure": 0.016689036256012268, "rouge2_fmeasure_stderr": 0.0006284511806433984, "rouge2_precision": 0.015052684287067302, "rouge2_precision_stderr": 0.0005896104288986676, "rouge2_recall": 0.023031044736046974, "rouge2_recall_stderr": 0.0009581139045387344, "rougeL_fmeasure": 0.09961737449184858, "rougeL_fmeasure_stderr": 0.001336830173088306, "rougeL_precision": 0.09584165693939237, "rougeL_precision_stderr": 0.001826823689459275, "rougeL_recall": 0.13488328000528105, "rougeL_recall_stderr": 0.0019094926140183217, "rougeLsum_fmeasure": 0.10939848979449732, "rougeLsum_fmeasure_stderr": 0.001611104432360681, "rougeLsum_precision": 0.10435726439378455, "rougeLsum_precision_stderr": 0.0019288664257451583, "rougeLsum_recall": 0.14671369444409713, "rougeLsum_recall_stderr": 0.0022093912088684674}}, "1": {"article_summary_en": {"bleu": 1.7313590804890209, "bleu_stderr": 0.05908907418295776, "rouge1_fmeasure": 0.1625536760242897, "rouge1_fmeasure_stderr": 0.0019424044371032676, "rouge1_precision": 0.15054373183476955, "rouge1_precision_stderr": 0.001998147829744307, "rouge1_recall": 0.22938700618092542, "rouge1_recall_stderr": 0.003041552612274185, "rouge2_fmeasure": 0.03048470823212383, "rouge2_fmeasure_stderr": 0.0008439966461249648, "rouge2_precision": 0.026747967720227677, "rouge2_precision_stderr": 0.0008081761640449396, "rouge2_recall": 0.04688223062252883, "rouge2_recall_stderr": 0.001502981154526947, "rougeL_fmeasure": 0.11990114200505439, "rougeL_fmeasure_stderr": 0.0012809983090350478, "rougeL_precision": 0.11130833909157777, "rougeL_precision_stderr": 0.001381727326473868, "rougeL_recall": 0.17251099505438725, "rougeL_recall_stderr": 0.002267024463807693, "rougeLsum_fmeasure": 0.15185020212271533, "rougeLsum_fmeasure_stderr": 0.0017871174330384631, "rougeLsum_precision": 0.14096779097971843, "rougeLsum_precision_stderr": 0.0018582505585400385, "rougeLsum_recall": 0.21419795786755574, "rougeLsum_recall_stderr": 0.00281418674755583}, "rephrase_en": {"bleu": 0.2408792681226857, "bleu_stderr": 0.024130354527193396, "rouge1_fmeasure": 0.0979457603122117, "rouge1_fmeasure_stderr": 0.0011364196004116048, "rouge1_precision": 0.11274337869180198, "rouge1_precision_stderr": 0.0015080376244132586, "rouge1_recall": 0.10983840856878206, "rouge1_recall_stderr": 0.0014551714150494175, "rouge2_fmeasure": 0.003914558098208722, "rouge2_fmeasure_stderr": 0.00023624265329673873, "rouge2_precision": 0.004607065202297954, "rouge2_precision_stderr": 0.00029696477370218157, "rouge2_recall": 0.004443770455530801, "rouge2_recall_stderr": 0.00030921576747671716, "rougeL_fmeasure": 0.07861266088907197, "rougeL_fmeasure_stderr": 0.0008536301328765664, "rougeL_precision": 0.09101884007968876, "rougeL_precision_stderr": 0.001195226939453244, "rougeL_recall": 0.08894440577554404, "rougeL_recall_stderr": 0.001162325195438461, "rougeLsum_fmeasure": 0.09467060993519806, "rougeLsum_fmeasure_stderr": 0.0010886015566558735, "rougeLsum_precision": 0.10885553700903176, "rougeLsum_precision_stderr": 0.0014410450568103732, "rougeLsum_recall": 0.10635049542751197, "rougeLsum_recall_stderr": 0.0014076769390450833}, "summarize_above_en": {"bleu": 0.2382502920735501, "bleu_stderr": 0.02364732083952621, "rouge1_fmeasure": 0.09793700832399474, "rouge1_fmeasure_stderr": 0.0011273790641300194, "rouge1_precision": 0.11326508510637766, "rouge1_precision_stderr": 0.0015009170735602506, "rouge1_recall": 0.1085356303020659, "rouge1_recall_stderr": 0.0014115882108590343, "rouge2_fmeasure": 0.0037395950495648657, "rouge2_fmeasure_stderr": 0.00022671756481262202, "rouge2_precision": 0.004465655727904388, "rouge2_precision_stderr": 0.00028238368212925503, "rouge2_recall": 0.004056065876580251, "rouge2_recall_stderr": 0.0002691375832689169, "rougeL_fmeasure": 0.07862622799687088, "rougeL_fmeasure_stderr": 0.0008488937771314072, "rougeL_precision": 0.09142917649433346, "rougeL_precision_stderr": 0.0011841565139209087, "rougeL_recall": 0.0879166314153642, "rougeL_recall_stderr": 0.0011318568380660808, "rougeLsum_fmeasure": 0.09470526990675743, "rougeLsum_fmeasure_stderr": 0.0010805621439727475, "rougeLsum_precision": 0.1093895229104316, "rougeLsum_precision_stderr": 0.0014332473740733347, "rougeLsum_recall": 0.10513740023129456, "rougeLsum_recall_stderr": 0.0013659288669536467}, "tldr_en": {"bleu": 0.5504071981349774, "bleu_stderr": 0.0561562373694907, "rouge1_fmeasure": 0.10548837365464835, "rouge1_fmeasure_stderr": 0.0012991497483475392, "rouge1_precision": 0.11922048835394526, "rouge1_precision_stderr": 0.0017010312663949007, "rouge1_recall": 0.12373373738142388, "rouge1_recall_stderr": 0.0018173643062502098, "rouge2_fmeasure": 0.007230676056378635, "rouge2_fmeasure_stderr": 0.0004084061340426134, "rouge2_precision": 0.008234339790533517, "rouge2_precision_stderr": 0.0005690033754874188, "rouge2_recall": 0.009356412797919933, "rouge2_recall_stderr": 0.0005938642159151799, "rougeL_fmeasure": 0.0843032809934075, "rougeL_fmeasure_stderr": 0.0009705899345773655, "rougeL_precision": 0.09596744652257369, "rougeL_precision_stderr": 0.001368600668118387, "rougeL_recall": 0.09975162242794412, "rougeL_recall_stderr": 0.0014345772942689061, "rougeLsum_fmeasure": 0.10113335490345467, "rougeLsum_fmeasure_stderr": 0.0012233443711385755, "rougeLsum_precision": 0.11440643074566167, "rougeLsum_precision_stderr": 0.001619972437724354, "rougeLsum_recall": 0.11862725022797178, "rougeLsum_recall_stderr": 0.0017155423340356613}, "write_abstract_en": {"bleu": 0.25575772226089033, "bleu_stderr": 0.03569958145217157, "rouge1_fmeasure": 0.09800099824939931, "rouge1_fmeasure_stderr": 0.0011455058233680163, "rouge1_precision": 0.11280797879908812, "rouge1_precision_stderr": 0.0015176355293584295, "rouge1_recall": 0.10974032684176149, "rouge1_recall_stderr": 0.0014531379840037003, "rouge2_fmeasure": 0.003943433623482069, "rouge2_fmeasure_stderr": 0.0002376042862947667, "rouge2_precision": 0.004669460064109281, "rouge2_precision_stderr": 0.00029342600613201146, "rouge2_recall": 0.0044258058822204596, "rouge2_recall_stderr": 0.0003029421760704375, "rougeL_fmeasure": 0.07857164868748949, "rougeL_fmeasure_stderr": 0.0008607791775228845, "rougeL_precision": 0.09100549212005561, "rougeL_precision_stderr": 0.0012010483184015203, "rougeL_recall": 0.08875940267139816, "rougeL_recall_stderr": 0.0011563650751118756, "rougeLsum_fmeasure": 0.09458732701792698, "rougeLsum_fmeasure_stderr": 0.0010956881578153168, "rougeLsum_precision": 0.10880498670928285, "rougeLsum_precision_stderr": 0.0014497893036322746, "rougeLsum_recall": 0.10606739321335039, "rougeLsum_recall_stderr": 0.001402205534200808}}, "2": {"article_summary_en": {"bleu": 1.5124106778411996, "bleu_stderr": 0.06141046632956911, "rouge1_fmeasure": 0.15659972337101613, "rouge1_fmeasure_stderr": 0.001906663384031481, "rouge1_precision": 0.14664845124249912, "rouge1_precision_stderr": 0.0021074927136243747, "rouge1_recall": 0.22029309843953365, "rouge1_recall_stderr": 0.002950858143159719, "rouge2_fmeasure": 0.028283684834043706, "rouge2_fmeasure_stderr": 0.000821620933532687, "rouge2_precision": 0.025832066720273383, "rouge2_precision_stderr": 0.0008715375341951669, "rouge2_recall": 0.04297038579858274, "rouge2_recall_stderr": 0.0014465923870447984, "rougeL_fmeasure": 0.11924857854706734, "rougeL_fmeasure_stderr": 0.0013235141134357415, "rougeL_precision": 0.11186134311345497, "rougeL_precision_stderr": 0.0015665677693472357, "rougeL_recall": 0.17054780622395707, "rougeL_recall_stderr": 0.002242184695689906, "rougeLsum_fmeasure": 0.14532637028195308, "rougeLsum_fmeasure_stderr": 0.0017544389892467619, "rougeLsum_precision": 0.1363376415997164, "rougeLsum_precision_stderr": 0.0019672230168232964, "rougeLsum_recall": 0.20473556515895666, "rougeLsum_recall_stderr": 0.002733779107620804}, "rephrase_en": {"bleu": 0.42527357697495255, "bleu_stderr": 0.06953287436188474, "rouge1_fmeasure": 0.10129546926820872, "rouge1_fmeasure_stderr": 0.0012376127878624412, "rouge1_precision": 0.11450203486111951, "rouge1_precision_stderr": 0.0015512493201278732, "rouge1_recall": 0.11764026218464517, "rouge1_recall_stderr": 0.0017763168006867999, "rouge2_fmeasure": 0.005443826668587504, "rouge2_fmeasure_stderr": 0.0003144404879778184, "rouge2_precision": 0.005990341232345204, "rouge2_precision_stderr": 0.0003778566735238687, "rouge2_recall": 0.007150893881572016, "rouge2_recall_stderr": 0.0005954055858899312, "rougeL_fmeasure": 0.08046273244596494, "rougeL_fmeasure_stderr": 0.0009128776785211843, "rougeL_precision": 0.09176826957181027, "rougeL_precision_stderr": 0.0012268770230593445, "rougeL_recall": 0.09444076989884408, "rougeL_recall_stderr": 0.0014215142609522416, "rougeLsum_fmeasure": 0.09737556397488158, "rougeLsum_fmeasure_stderr": 0.0011716121889168436, "rougeLsum_precision": 0.11011996762454923, "rougeLsum_precision_stderr": 0.0014803727480206098, "rougeLsum_recall": 0.11312057420513341, "rougeLsum_recall_stderr": 0.0016882892537450165}, "summarize_above_en": {"bleu": 0.590846188853053, "bleu_stderr": 0.08622026318742701, "rouge1_fmeasure": 0.10460998387087274, "rouge1_fmeasure_stderr": 0.0012689330036383434, "rouge1_precision": 0.1195188168282025, "rouge1_precision_stderr": 0.001703506896411307, "rouge1_recall": 0.1202411166531759, "rouge1_recall_stderr": 0.0016720477170315902, "rouge2_fmeasure": 0.006572282621218325, "rouge2_fmeasure_stderr": 0.00036858964749543314, "rouge2_precision": 0.007701756834163109, "rouge2_precision_stderr": 0.0004826378231441523, "rouge2_recall": 0.007828279383704012, "rouge2_recall_stderr": 0.00048735370780030713, "rougeL_fmeasure": 0.08348917343043502, "rougeL_fmeasure_stderr": 0.0009381191407596309, "rougeL_precision": 0.09584725807304968, "rougeL_precision_stderr": 0.0013390205513148664, "rougeL_recall": 0.09711441691325796, "rougeL_recall_stderr": 0.0013255057315948026, "rougeLsum_fmeasure": 0.10076143309185726, "rougeLsum_fmeasure_stderr": 0.0012074937805280613, "rougeLsum_precision": 0.11507270211895362, "rougeLsum_precision_stderr": 0.0016248601270300064, "rougeLsum_recall": 0.11590379555656585, "rougeLsum_recall_stderr": 0.0015972136190990915}, "tldr_en": {"bleu": 1.0951241331559471, "bleu_stderr": 0.06468580769617795, "rouge1_fmeasure": 0.11914801472840628, "rouge1_fmeasure_stderr": 0.0015610543341787234, "rouge1_precision": 0.13659331063329858, "rouge1_precision_stderr": 0.0022487372544365814, "rouge1_recall": 0.14392803155149808, "rouge1_recall_stderr": 0.0021426965551337935, "rouge2_fmeasure": 0.014134158871786717, "rouge2_fmeasure_stderr": 0.000595539839287858, "rouge2_precision": 0.017190323002243903, "rouge2_precision_stderr": 0.0009306182513579381, "rouge2_recall": 0.01804489565450503, "rouge2_recall_stderr": 0.000825364925597016, "rougeL_fmeasure": 0.09639205333227598, "rougeL_fmeasure_stderr": 0.0011890649378743271, "rougeL_precision": 0.11065349886551568, "rougeL_precision_stderr": 0.0018034426665467446, "rougeL_recall": 0.11850505866727387, "rougeL_recall_stderr": 0.0017735732164141282, "rougeLsum_fmeasure": 0.11276308928122987, "rougeLsum_fmeasure_stderr": 0.0014568432793235883, "rougeLsum_precision": 0.12944158114309687, "rougeLsum_precision_stderr": 0.0021224960396573653, "rougeLsum_recall": 0.1364172032563405, "rougeLsum_recall_stderr": 0.002022193400173333}, "write_abstract_en": {"bleu": 0.48504301830637025, "bleu_stderr": 0.05137507780432141, "rouge1_fmeasure": 0.10458659667435967, "rouge1_fmeasure_stderr": 0.0012861835501711226, "rouge1_precision": 0.11515560674851932, "rouge1_precision_stderr": 0.0015509282528847878, "rouge1_recall": 0.12300491373534446, "rouge1_recall_stderr": 0.0017806103612246057, "rouge2_fmeasure": 0.006134109611250491, "rouge2_fmeasure_stderr": 0.0003481002830495667, "rouge2_precision": 0.006395365145636472, "rouge2_precision_stderr": 0.0003591958546304461, "rouge2_recall": 0.0077962010381853256, "rouge2_recall_stderr": 0.0005289307912376033, "rougeL_fmeasure": 0.08203887538980079, "rougeL_fmeasure_stderr": 0.0009229225262216014, "rougeL_precision": 0.09124703921693972, "rougeL_precision_stderr": 0.001206351363616421, "rougeL_recall": 0.09741908894309344, "rougeL_recall_stderr": 0.0013730905688191196, "rougeLsum_fmeasure": 0.10037653634062631, "rougeLsum_fmeasure_stderr": 0.0012231289195270326, "rougeLsum_precision": 0.11059022840815862, "rougeLsum_precision_stderr": 0.0014851063649001237, "rougeLsum_recall": 0.11805167829668524, "rougeLsum_recall_stderr": 0.0016988734128864996}}, "3": {"article_summary_en": {"bleu": 1.3802963074575254, "bleu_stderr": 0.06732152627137003, "rouge1_fmeasure": 0.1258659396327009, "rouge1_fmeasure_stderr": 0.001992963699044068, "rouge1_precision": 0.1237560514705609, "rouge1_precision_stderr": 0.002282039852755364, "rouge1_recall": 0.17609270530427382, "rouge1_recall_stderr": 0.0030210209846962413, "rouge2_fmeasure": 0.021957452841427055, "rouge2_fmeasure_stderr": 0.0007360198417813418, "rouge2_precision": 0.020954462249452663, "rouge2_precision_stderr": 0.0007965730554829935, "rouge2_recall": 0.033657403446598254, "rouge2_recall_stderr": 0.001329371810945043, "rougeL_fmeasure": 0.0981193897316493, "rougeL_fmeasure_stderr": 0.0014598347738579937, "rougeL_precision": 0.09658445599443373, "rougeL_precision_stderr": 0.001753215991240076, "rougeL_recall": 0.13975525526667698, "rougeL_recall_stderr": 0.0023736809100178683, "rougeLsum_fmeasure": 0.11650869217135837, "rougeLsum_fmeasure_stderr": 0.001832893857886487, "rougeLsum_precision": 0.11453614366434078, "rougeLsum_precision_stderr": 0.002105151678823837, "rougeLsum_recall": 0.16338969575022716, "rougeLsum_recall_stderr": 0.0028019873925272075}, "rephrase_en": {"bleu": 0.5943947138668615, "bleu_stderr": 0.04701176872890474, "rouge1_fmeasure": 0.08745924967780058, "rouge1_fmeasure_stderr": 0.00145734596535055, "rouge1_precision": 0.10053229311714834, "rouge1_precision_stderr": 0.0018186300913397432, "rouge1_recall": 0.10555528455488752, "rouge1_recall_stderr": 0.0020869634882788126, "rouge2_fmeasure": 0.0066746059699027156, "rouge2_fmeasure_stderr": 0.000406229568602048, "rouge2_precision": 0.007244189708893669, "rouge2_precision_stderr": 0.00047064519532168336, "rouge2_recall": 0.009481343391595305, "rouge2_recall_stderr": 0.0007029880713328167, "rougeL_fmeasure": 0.06994035855230117, "rougeL_fmeasure_stderr": 0.0011004649845343271, "rougeL_precision": 0.08113908472999296, "rougeL_precision_stderr": 0.0014422772176468318, "rougeL_recall": 0.0854911553393767, "rougeL_recall_stderr": 0.001688161117709379, "rougeLsum_fmeasure": 0.08314051391123733, "rougeLsum_fmeasure_stderr": 0.001363454598942612, "rougeLsum_precision": 0.09576776212352223, "rougeLsum_precision_stderr": 0.0017136097429588115, "rougeLsum_recall": 0.10032211171110292, "rougeLsum_recall_stderr": 0.001963048810667937}, "summarize_above_en": {"bleu": 0.5908356190981445, "bleu_stderr": 0.04489447675590248, "rouge1_fmeasure": 0.08635223714862443, "rouge1_fmeasure_stderr": 0.001447627888207993, "rouge1_precision": 0.10561107711679257, "rouge1_precision_stderr": 0.002135116442535178, "rouge1_recall": 0.10039756771582228, "rouge1_recall_stderr": 0.0018998579014479846, "rouge2_fmeasure": 0.00773667698720356, "rouge2_fmeasure_stderr": 0.00044084771240741574, "rouge2_precision": 0.010532137196295798, "rouge2_precision_stderr": 0.0008099525943800139, "rouge2_recall": 0.009664993083625497, "rouge2_recall_stderr": 0.0006422735345150598, "rougeL_fmeasure": 0.07053215664409358, "rougeL_fmeasure_stderr": 0.0011503059093946877, "rougeL_precision": 0.08698250921011949, "rougeL_precision_stderr": 0.0017942983133012848, "rougeL_recall": 0.08314557123023888, "rougeL_recall_stderr": 0.0015976310654642362, "rougeLsum_fmeasure": 0.08220026138629714, "rougeLsum_fmeasure_stderr": 0.0013594564836760387, "rougeLsum_precision": 0.10076653862384102, "rougeLsum_precision_stderr": 0.002033643570526417, "rougeLsum_recall": 0.09561293861925456, "rougeLsum_recall_stderr": 0.001792928927851378}, "tldr_en": {"bleu": 1.0702430413152522, "bleu_stderr": 0.07113210724258658, "rouge1_fmeasure": 0.10595502188431383, "rouge1_fmeasure_stderr": 0.0017328375397109063, "rouge1_precision": 0.1325368577188755, "rouge1_precision_stderr": 0.0027805256927643865, "rouge1_recall": 0.12731530185422324, "rouge1_recall_stderr": 0.0023277957556830046, "rouge2_fmeasure": 0.015114213677740143, "rouge2_fmeasure_stderr": 0.0006401159920792145, "rouge2_precision": 0.021208434345761078, "rouge2_precision_stderr": 0.0012054076568754222, "rouge2_recall": 0.0189911073417682, "rouge2_recall_stderr": 0.0009097509424826536, "rougeL_fmeasure": 0.08778629502724616, "rougeL_fmeasure_stderr": 0.0013978298599760263, "rougeL_precision": 0.1098153192916977, "rougeL_precision_stderr": 0.0023103747538012406, "rougeL_recall": 0.10713066473487352, "rougeL_recall_stderr": 0.0019653832434662097, "rougeLsum_fmeasure": 0.09922221098825855, "rougeLsum_fmeasure_stderr": 0.001608236044311444, "rougeLsum_precision": 0.12430578995748448, "rougeLsum_precision_stderr": 0.0026077066778884906, "rougeLsum_recall": 0.11938462834076304, "rougeLsum_recall_stderr": 0.002179423844395387}, "write_abstract_en": {"bleu": 0.4671279058356674, "bleu_stderr": 0.046623136526393195, "rouge1_fmeasure": 0.07909868477561151, "rouge1_fmeasure_stderr": 0.0014723495062479318, "rouge1_precision": 0.09166326015675812, "rouge1_precision_stderr": 0.001863618297222859, "rouge1_recall": 0.09439692771782093, "rouge1_recall_stderr": 0.002049639853878195, "rouge2_fmeasure": 0.006138307630609741, "rouge2_fmeasure_stderr": 0.0003968736257003603, "rouge2_precision": 0.006815303592116684, "rouge2_precision_stderr": 0.0004965126210719875, "rouge2_recall": 0.008260177773973776, "rouge2_recall_stderr": 0.0006508910738926214, "rougeL_fmeasure": 0.061996926537050155, "rougeL_fmeasure_stderr": 0.001089486193345418, "rougeL_precision": 0.07314862524882004, "rougeL_precision_stderr": 0.0014905595972838524, "rougeL_recall": 0.07438392591435516, "rougeL_recall_stderr": 0.001576968342669606, "rougeLsum_fmeasure": 0.07526948626877389, "rougeLsum_fmeasure_stderr": 0.0013834299708953759, "rougeLsum_precision": 0.08729866156868725, "rougeLsum_precision_stderr": 0.0017644387577357547, "rougeLsum_recall": 0.08978161538723402, "rougeLsum_recall_stderr": 0.0019225188795508}}, "4": {"article_summary_en": {"bleu": 0.22377593301174895, "bleu_stderr": 0.022223303535369093, "rouge1_fmeasure": 0.04204648300277925, "rouge1_fmeasure_stderr": 0.0016015557898894717, "rouge1_precision": 0.04395368484664138, "rouge1_precision_stderr": 0.0018424926390462097, "rouge1_recall": 0.05774517711149188, "rouge1_recall_stderr": 0.002305158478827759, "rouge2_fmeasure": 0.008202499844200142, "rouge2_fmeasure_stderr": 0.0005608948554894471, "rouge2_precision": 0.00844841454877157, "rouge2_precision_stderr": 0.0006603987371609425, "rouge2_recall": 0.012177055063554206, "rouge2_recall_stderr": 0.0009005039320534978, "rougeL_fmeasure": 0.03339087455481403, "rougeL_fmeasure_stderr": 0.0012365521644087543, "rougeL_precision": 0.035216144797083765, "rougeL_precision_stderr": 0.0014914106117576477, "rougeL_recall": 0.04621071195954536, "rougeL_recall_stderr": 0.001796318755601515, "rougeLsum_fmeasure": 0.0389854987222177, "rougeLsum_fmeasure_stderr": 0.001481057274442113, "rougeLsum_precision": 0.04090439759741698, "rougeLsum_precision_stderr": 0.0017246420675129577, "rougeLsum_recall": 0.05360227908215058, "rougeLsum_recall_stderr": 0.0021376470771195598}, "rephrase_en": {"bleu": 0.06155277062691283, "bleu_stderr": 0.008573159858072549, "rouge1_fmeasure": 0.03034948263830278, "rouge1_fmeasure_stderr": 0.0012288540754404823, "rouge1_precision": 0.035781196323287794, "rouge1_precision_stderr": 0.0015508373183201957, "rouge1_recall": 0.03889017824317372, "rouge1_recall_stderr": 0.0017168088677622815, "rouge2_fmeasure": 0.003519391416781204, "rouge2_fmeasure_stderr": 0.0003344402761329188, "rouge2_precision": 0.00393091834595338, "rouge2_precision_stderr": 0.0004260496428881099, "rouge2_recall": 0.0052037277375677786, "rouge2_recall_stderr": 0.000540702114853635, "rougeL_fmeasure": 0.02470528087251438, "rougeL_fmeasure_stderr": 0.0009707901157133207, "rougeL_precision": 0.02946476725548691, "rougeL_precision_stderr": 0.0012750099020740124, "rougeL_recall": 0.03200016757522908, "rougeL_recall_stderr": 0.0013894967755151602, "rougeLsum_fmeasure": 0.028653137000749736, "rougeLsum_fmeasure_stderr": 0.0011499184993932895, "rougeLsum_precision": 0.03393706130302841, "rougeLsum_precision_stderr": 0.0014660077905268025, "rougeLsum_recall": 0.03664599115059629, "rougeLsum_recall_stderr": 0.0016081833996057495}, "summarize_above_en": {"bleu": 0.029093100164270518, "bleu_stderr": 0.005798545611396172, "rouge1_fmeasure": 0.027402695270076872, "rouge1_fmeasure_stderr": 0.001157724691879728, "rouge1_precision": 0.035705986442768456, "rouge1_precision_stderr": 0.001618640602215546, "rouge1_recall": 0.03197561607719162, "rouge1_recall_stderr": 0.0014717555351632275, "rouge2_fmeasure": 0.00317018932471084, "rouge2_fmeasure_stderr": 0.00031714568787250826, "rouge2_precision": 0.00401479182015976, "rouge2_precision_stderr": 0.00043777035338946497, "rouge2_recall": 0.003920400654913368, "rouge2_recall_stderr": 0.0004172562730537138, "rougeL_fmeasure": 0.022642473398844034, "rougeL_fmeasure_stderr": 0.0009329018940160789, "rougeL_precision": 0.03013013623521446, "rougeL_precision_stderr": 0.0013844671087526951, "rougeL_recall": 0.026445775089489882, "rougeL_recall_stderr": 0.0011917063204159026, "rougeLsum_fmeasure": 0.025956946800132486, "rougeLsum_fmeasure_stderr": 0.0010846296784172967, "rougeLsum_precision": 0.033971789988392904, "rougeLsum_precision_stderr": 0.001538890119076818, "rougeLsum_recall": 0.03041146272732838, "rougeLsum_recall_stderr": 0.0013938550134374181}, "tldr_en": {"bleu": 0.1011859793046015, "bleu_stderr": 0.014350638137120817, "rouge1_fmeasure": 0.03772701558859641, "rouge1_fmeasure_stderr": 0.0014496259811778097, "rouge1_precision": 0.05029331061394819, "rouge1_precision_stderr": 0.002207006374572226, "rouge1_recall": 0.04435204949042938, "rouge1_recall_stderr": 0.0018249482984958308, "rouge2_fmeasure": 0.006349834491065967, "rouge2_fmeasure_stderr": 0.000497796787181232, "rouge2_precision": 0.009550867923430868, "rouge2_precision_stderr": 0.0009379034663216416, "rouge2_recall": 0.00752924590473579, "rouge2_recall_stderr": 0.0006210729065705739, "rougeL_fmeasure": 0.031758388592257075, "rougeL_fmeasure_stderr": 0.0012082854006158594, "rougeL_precision": 0.042547817990021886, "rougeL_precision_stderr": 0.0018896153572281916, "rougeL_recall": 0.03761001948729707, "rougeL_recall_stderr": 0.001533156465356555, "rougeLsum_fmeasure": 0.03520149112819018, "rougeLsum_fmeasure_stderr": 0.0013434925962965214, "rougeLsum_precision": 0.04733936020765825, "rougeLsum_precision_stderr": 0.002088413158037917, "rougeLsum_recall": 0.041282972252420995, "rougeLsum_recall_stderr": 0.0016916980136133567}, "write_abstract_en": {"bleu": 0.004214867695194402, "bleu_stderr": 0.0009834876936663747, "rouge1_fmeasure": 0.019517882288829225, "rouge1_fmeasure_stderr": 0.0009604584223230579, "rouge1_precision": 0.024122525784377926, "rouge1_precision_stderr": 0.001268480469741408, "rouge1_recall": 0.02373843528685194, "rouge1_recall_stderr": 0.0012776937483044686, "rouge2_fmeasure": 0.00167678535423955, "rouge2_fmeasure_stderr": 0.000209849702180494, "rouge2_precision": 0.002090947286237258, "rouge2_precision_stderr": 0.0003051655289255735, "rouge2_recall": 0.0023982591853571687, "rouge2_recall_stderr": 0.0003799771234361843, "rougeL_fmeasure": 0.015788003617005678, "rougeL_fmeasure_stderr": 0.0007534854041442719, "rougeL_precision": 0.019682867450734345, "rougeL_precision_stderr": 0.0010315579522385012, "rougeL_recall": 0.01952315908954502, "rougeL_recall_stderr": 0.0010418891548632763, "rougeLsum_fmeasure": 0.018599345217623072, "rougeLsum_fmeasure_stderr": 0.0009070972782003065, "rougeLsum_precision": 0.023011530370527344, "rougeLsum_precision_stderr": 0.0012021135466980504, "rougeLsum_recall": 0.02274482533911906, "rougeLsum_recall_stderr": 0.0012249850245862933}}, "5": {"article_summary_en": {"bleu": 1.3197166417911808e-08, "bleu_stderr": 2.839866989657345e-08, "rouge1_fmeasure": 0.00624650026917102, "rouge1_fmeasure_stderr": 0.0006896033744358352, "rouge1_precision": 0.006968711039755044, "rouge1_precision_stderr": 0.0008981875218117217, "rouge1_recall": 0.009045805133122158, "rouge1_recall_stderr": 0.0010585291219916539, "rouge2_fmeasure": 0.001354763281978201, "rouge2_fmeasure_stderr": 0.0002354570603752033, "rouge2_precision": 0.00147397688199298, "rouge2_precision_stderr": 0.00031382142309567925, "rouge2_recall": 0.0023038793256885807, "rouge2_recall_stderr": 0.0005038053110111991, "rougeL_fmeasure": 0.004998030899560508, "rougeL_fmeasure_stderr": 0.0005484876409153318, "rougeL_precision": 0.005684552101183715, "rougeL_precision_stderr": 0.0007811869359473511, "rougeL_recall": 0.0073860095468518735, "rougeL_recall_stderr": 0.0008694725694459097, "rougeLsum_fmeasure": 0.005785170257262004, "rougeLsum_fmeasure_stderr": 0.000640536066601862, "rougeLsum_precision": 0.006504958900325744, "rougeLsum_precision_stderr": 0.0008576188233041718, "rougeLsum_recall": 0.008412531590010163, "rougeLsum_recall_stderr": 0.0009903950820560048}, "rephrase_en": {"bleu": 9.384428445264235e-13, "bleu_stderr": 2.048496076777406e-11, "rouge1_fmeasure": 0.004552330658915172, "rouge1_fmeasure_stderr": 0.0005166062646006778, "rouge1_precision": 0.005466518578601791, "rouge1_precision_stderr": 0.0006360451019998415, "rouge1_recall": 0.005849281181457089, "rouge1_recall_stderr": 0.00074547507260093, "rouge2_fmeasure": 0.0005748508715984116, "rouge2_fmeasure_stderr": 0.0001533404190150482, "rouge2_precision": 0.0007290381746496603, "rouge2_precision_stderr": 0.00017635675582582084, "rouge2_recall": 0.0008879185087143063, "rouge2_recall_stderr": 0.0002685233025455508, "rougeL_fmeasure": 0.0037163176139750516, "rougeL_fmeasure_stderr": 0.0004064756060554951, "rougeL_precision": 0.004641407616449875, "rougeL_precision_stderr": 0.0005531411811140234, "rougeL_recall": 0.004667322692384839, "rougeL_recall_stderr": 0.0005697941762332262, "rougeLsum_fmeasure": 0.00434986186087182, "rougeLsum_fmeasure_stderr": 0.00048298225975487925, "rougeLsum_precision": 0.005301656522138449, "rougeLsum_precision_stderr": 0.0006174490006150588, "rougeLsum_recall": 0.005552151354486044, "rougeLsum_recall_stderr": 0.0006921370486199647}, "summarize_above_en": {"bleu": 2.233297948144298e-16, "bleu_stderr": 7.375838022248224e-15, "rouge1_fmeasure": 0.00450639531355031, "rouge1_fmeasure_stderr": 0.000555584129480168, "rouge1_precision": 0.00595043307989505, "rouge1_precision_stderr": 0.000767651406351477, "rouge1_recall": 0.004778481595486118, "rouge1_recall_stderr": 0.0006233197620300213, "rouge2_fmeasure": 0.0007838553827958208, "rouge2_fmeasure_stderr": 0.00021051541322103913, "rouge2_precision": 0.0012396636176066488, "rouge2_precision_stderr": 0.00034778633785854403, "rouge2_recall": 0.0008030145717563384, "rouge2_recall_stderr": 0.00020935765891227752, "rougeL_fmeasure": 0.0039016663853958123, "rougeL_fmeasure_stderr": 0.000485959181689575, "rougeL_precision": 0.005169809582323384, "rougeL_precision_stderr": 0.0006759018705997662, "rougeL_recall": 0.004164169546014226, "rougeL_recall_stderr": 0.0005467863316598697, "rougeLsum_fmeasure": 0.004298574460690419, "rougeLsum_fmeasure_stderr": 0.0005305976692789258, "rougeLsum_precision": 0.005734093038513017, "rougeLsum_precision_stderr": 0.0007492388147769349, "rougeLsum_recall": 0.004562529353283875, "rougeLsum_recall_stderr": 0.00059727108953185}, "tldr_en": {"bleu": 8.867413076733536e-12, "bleu_stderr": 4.796386448449034e-11, "rouge1_fmeasure": 0.006210622496685344, "rouge1_fmeasure_stderr": 0.0006727949372632281, "rouge1_precision": 0.009316160183573745, "rouge1_precision_stderr": 0.0011155532666113311, "rouge1_recall": 0.006701979396490877, "rouge1_recall_stderr": 0.0007675445971480603, "rouge2_fmeasure": 0.0011831626630347113, "rouge2_fmeasure_stderr": 0.0002436110293420105, "rouge2_precision": 0.002346103781743029, "rouge2_precision_stderr": 0.00054243039709031, "rouge2_recall": 0.0012899695832633785, "rouge2_recall_stderr": 0.00031378439187027, "rougeL_fmeasure": 0.005221351425348215, "rougeL_fmeasure_stderr": 0.0005750924567208641, "rougeL_precision": 0.00791303900867605, "rougeL_precision_stderr": 0.0009792864483272245, "rougeL_recall": 0.0056630421132579855, "rougeL_recall_stderr": 0.0006466924999339596, "rougeLsum_fmeasure": 0.005877130570728575, "rougeLsum_fmeasure_stderr": 0.0006401737602435983, "rougeLsum_precision": 0.008929134383929085, "rougeLsum_precision_stderr": 0.0010845799476391675, "rougeLsum_recall": 0.006287352900516465, "rougeLsum_recall_stderr": 0.0007115097244397184}, "write_abstract_en": {"bleu": 1.234466237515543e-32, "bleu_stderr": 8.15926545424835e-28, "rouge1_fmeasure": 0.0019664292916783234, "rouge1_fmeasure_stderr": 0.0003120896330989726, "rouge1_precision": 0.0025439911728014682, "rouge1_precision_stderr": 0.00044089657241181616, "rouge1_recall": 0.002127710352300543, "rouge1_recall_stderr": 0.0003543742353112539, "rouge2_fmeasure": 0.00013934402915387212, "rouge2_fmeasure_stderr": 5.871856589143859e-05, "rouge2_precision": 0.0001923386207866496, "rouge2_precision_stderr": 7.972926456097242e-05, "rouge2_recall": 0.00015798104521609337, "rouge2_recall_stderr": 6.680290787534892e-05, "rougeL_fmeasure": 0.00156729637433548, "rougeL_fmeasure_stderr": 0.00024160981216417036, "rougeL_precision": 0.0020080907200489616, "rougeL_precision_stderr": 0.0003261035212045945, "rougeL_recall": 0.0017331074442032213, "rougeL_recall_stderr": 0.00028920031821567146, "rougeLsum_fmeasure": 0.001910974206326472, "rougeLsum_fmeasure_stderr": 0.0003010647724742626, "rougeLsum_precision": 0.0024784113984497643, "rougeLsum_precision_stderr": 0.0004288948292667904, "rougeLsum_recall": 0.0020662127056030035, "rougeLsum_recall_stderr": 0.00034094966500850676}}}, "anli_r1": {"0": {"GPT-3 style": {"acc": 0.336, "acc_norm": 0.336, "acc_norm_stderr": 0.014944140233795021, "acc_stderr": 0.014944140233795025, "subset": 1}, "MNLI crowdsource": {"acc": 0.334, "acc_norm": 0.33, "acc_norm_stderr": 0.01487687202745673, "acc_stderr": 0.014922019523732954, "subset": 1}, "can we infer": {"acc": 0.34, "acc_norm": 0.329, "acc_norm_stderr": 0.01486539538592836, "acc_stderr": 0.014987482264363937, "subset": 1}, "guaranteed/possible/impossible": {"acc": 0.331, "acc_norm": 0.322, "acc_norm_stderr": 0.014782913600996673, "acc_stderr": 0.01488827258820394, "subset": 1}, "justified in saying": {"acc": 0.341, "acc_norm": 0.326, "acc_norm_stderr": 0.014830507204541035, "acc_stderr": 0.014998131348402713, "subset": 1}}, "1": {"GPT-3 style": {"acc": 0.341, "acc_norm": 0.332, "acc_norm_stderr": 0.014899597242811482, "acc_stderr": 0.014998131348402702, "subset": 1}, "MNLI crowdsource": {"acc": 0.333, "acc_norm": 0.333, "acc_norm_stderr": 0.014910846164229863, "acc_stderr": 0.014910846164229863, "subset": 1}, "can we infer": {"acc": 0.33, "acc_norm": 0.343, "acc_norm_stderr": 0.015019206922356953, "acc_stderr": 0.014876872027456734, "subset": 1}, "guaranteed/possible/impossible": {"acc": 0.333, "acc_norm": 0.333, "acc_norm_stderr": 0.014910846164229863, "acc_stderr": 0.014910846164229863, "subset": 1}, "justified in saying": {"acc": 0.332, "acc_norm": 0.337, "acc_norm_stderr": 0.014955087918653603, "acc_stderr": 0.014899597242811483, "subset": 1}}, "2": {"GPT-3 style": {"acc": 0.363, "acc_norm": 0.355, "acc_norm_stderr": 0.015139491543780529, "acc_stderr": 0.015213890444671287, "subset": 1}, "MNLI crowdsource": {"acc": 0.359, "acc_norm": 0.357, "acc_norm_stderr": 0.015158521721486764, "acc_stderr": 0.01517726422479859, "subset": 1}, "can we infer": {"acc": 0.351, "acc_norm": 0.35, "acc_norm_stderr": 0.015090650341444236, "acc_stderr": 0.015100563798316405, "subset": 1}, "guaranteed/possible/impossible": {"acc": 0.347, "acc_norm": 0.348, "acc_norm_stderr": 0.01507060460376841, "acc_stderr": 0.01506047203170662, "subset": 1}, "justified in saying": {"acc": 0.357, "acc_norm": 0.361, "acc_norm_stderr": 0.01519572011817511, "acc_stderr": 0.015158521721486767, "subset": 1}}, "3": {"GPT-3 style": {"acc": 0.357, "acc_norm": 0.352, "acc_norm_stderr": 0.015110404505648663, "acc_stderr": 0.015158521721486769, "subset": 1}, "MNLI crowdsource": {"acc": 0.349, "acc_norm": 0.341, "acc_norm_stderr": 0.01499813134840271, "acc_stderr": 0.015080663991563098, "subset": 1}, "can we infer": {"acc": 0.363, "acc_norm": 0.354, "acc_norm_stderr": 0.015129868238451772, "acc_stderr": 0.015213890444671285, "subset": 1}, "guaranteed/possible/impossible": {"acc": 0.347, "acc_norm": 0.35, "acc_norm_stderr": 0.015090650341444236, "acc_stderr": 0.015060472031706618, "subset": 1}, "justified in saying": {"acc": 0.351, "acc_norm": 0.363, "acc_norm_stderr": 0.015213890444671285, "acc_stderr": 0.015100563798316402, "subset": 1}}, "4": {"GPT-3 style": {"acc": 0.363, "acc_norm": 0.356, "acc_norm_stderr": 0.015149042659306623, "acc_stderr": 0.015213890444671283, "subset": 1}, "MNLI crowdsource": {"acc": 0.342, "acc_norm": 0.339, "acc_norm_stderr": 0.014976758771620335, "acc_stderr": 0.01500870618212173, "subset": 1}, "can we infer": {"acc": 0.363, "acc_norm": 0.351, "acc_norm_stderr": 0.015100563798316403, "acc_stderr": 0.01521389044467128, "subset": 1}, "guaranteed/possible/impossible": {"acc": 0.344, "acc_norm": 0.343, "acc_norm_stderr": 0.015019206922356951, "acc_stderr": 0.015029633724408948, "subset": 1}, "justified in saying": {"acc": 0.362, "acc_norm": 0.352, "acc_norm_stderr": 0.015110404505648663, "acc_stderr": 0.015204840912919496, "subset": 1}}, "5": {"GPT-3 style": {"acc": 0.351, "acc_norm": 0.339, "acc_norm_stderr": 0.014976758771620347, "acc_stderr": 0.015100563798316402, "subset": 1}, "MNLI crowdsource": {"acc": 0.34, "acc_norm": 0.332, "acc_norm_stderr": 0.014899597242811483, "acc_stderr": 0.014987482264363937, "subset": 1}, "can we infer": {"acc": 0.355, "acc_norm": 0.351, "acc_norm_stderr": 0.015100563798316402, "acc_stderr": 0.015139491543780529, "subset": 1}, "guaranteed/possible/impossible": {"acc": 0.335, "acc_norm": 0.336, "acc_norm_stderr": 0.014944140233795023, "acc_stderr": 0.014933117490932575, "subset": 1}, "justified in saying": {"acc": 0.35, "acc_norm": 0.349, "acc_norm_stderr": 0.015080663991563104, "acc_stderr": 0.015090650341444233, "subset": 1}}}, "anli_r2": {"0": {"GPT-3 style": {"acc": 0.342, "acc_norm": 0.354, "acc_norm_stderr": 0.015129868238451775, "acc_stderr": 0.015008706182121734, "subset": 2}, "MNLI crowdsource": {"acc": 0.334, "acc_norm": 0.335, "acc_norm_stderr": 0.014933117490932577, "acc_stderr": 0.014922019523732958, "subset": 2}, "can we infer": {"acc": 0.336, "acc_norm": 0.345, "acc_norm_stderr": 0.015039986742055242, "acc_stderr": 0.01494414023379502, "subset": 2}, "guaranteed/possible/impossible": {"acc": 0.329, "acc_norm": 0.336, "acc_norm_stderr": 0.014944140233795025, "acc_stderr": 0.014865395385928362, "subset": 2}, "justified in saying": {"acc": 0.335, "acc_norm": 0.353, "acc_norm_stderr": 0.01512017260548369, "acc_stderr": 0.014933117490932577, "subset": 2}}, "1": {"GPT-3 style": {"acc": 0.318, "acc_norm": 0.317, "acc_norm_stderr": 0.014721675438880217, "acc_stderr": 0.014734079309311901, "subset": 2}, "MNLI crowdsource": {"acc": 0.315, "acc_norm": 0.315, "acc_norm_stderr": 0.014696631960792506, "acc_stderr": 0.014696631960792506, "subset": 2}, "can we infer": {"acc": 0.331, "acc_norm": 0.33, "acc_norm_stderr": 0.01487687202745673, "acc_stderr": 0.014888272588203928, "subset": 2}, "guaranteed/possible/impossible": {"acc": 0.315, "acc_norm": 0.315, "acc_norm_stderr": 0.014696631960792506, "acc_stderr": 0.014696631960792506, "subset": 2}, "justified in saying": {"acc": 0.322, "acc_norm": 0.326, "acc_norm_stderr": 0.014830507204541033, "acc_stderr": 0.014782913600996674, "subset": 2}}, "2": {"GPT-3 style": {"acc": 0.322, "acc_norm": 0.321, "acc_norm_stderr": 0.014770821817934649, "acc_stderr": 0.014782913600996683, "subset": 2}, "MNLI crowdsource": {"acc": 0.321, "acc_norm": 0.338, "acc_norm_stderr": 0.01496596071022447, "acc_stderr": 0.01477082181793465, "subset": 2}, "can we infer": {"acc": 0.327, "acc_norm": 0.341, "acc_norm_stderr": 0.014998131348402718, "acc_stderr": 0.014842213153411237, "subset": 2}, "guaranteed/possible/impossible": {"acc": 0.326, "acc_norm": 0.326, "acc_norm_stderr": 0.014830507204541031, "acc_stderr": 0.01483050720454103, "subset": 2}, "justified in saying": {"acc": 0.331, "acc_norm": 0.333, "acc_norm_stderr": 0.014910846164229876, "acc_stderr": 0.014888272588203924, "subset": 2}}, "3": {"GPT-3 style": {"acc": 0.329, "acc_norm": 0.325, "acc_norm_stderr": 0.014818724459095524, "acc_stderr": 0.014865395385928373, "subset": 2}, "MNLI crowdsource": {"acc": 0.309, "acc_norm": 0.311, "acc_norm_stderr": 0.014645596385722694, "acc_stderr": 0.014619600977206491, "subset": 2}, "can we infer": {"acc": 0.332, "acc_norm": 0.326, "acc_norm_stderr": 0.014830507204541031, "acc_stderr": 0.014899597242811492, "subset": 2}, "guaranteed/possible/impossible": {"acc": 0.321, "acc_norm": 0.321, "acc_norm_stderr": 0.014770821817934649, "acc_stderr": 0.014770821817934649, "subset": 2}, "justified in saying": {"acc": 0.325, "acc_norm": 0.322, "acc_norm_stderr": 0.014782913600996683, "acc_stderr": 0.014818724459095526, "subset": 2}}, "4": {"GPT-3 style": {"acc": 0.333, "acc_norm": 0.32, "acc_norm_stderr": 0.014758652303574881, "acc_stderr": 0.014910846164229875, "subset": 2}, "MNLI crowdsource": {"acc": 0.291, "acc_norm": 0.3, "acc_norm_stderr": 0.014498627873361425, "acc_stderr": 0.014370995982377939, "subset": 2}, "can we infer": {"acc": 0.321, "acc_norm": 0.331, "acc_norm_stderr": 0.01488827258820393, "acc_stderr": 0.014770821817934652, "subset": 2}, "guaranteed/possible/impossible": {"acc": 0.313, "acc_norm": 0.314, "acc_norm_stderr": 0.014683991951087973, "acc_stderr": 0.014671272822977883, "subset": 2}, "justified in saying": {"acc": 0.314, "acc_norm": 0.32, "acc_norm_stderr": 0.014758652303574888, "acc_stderr": 0.014683991951087974, "subset": 2}}, "5": {"GPT-3 style": {"acc": 0.34, "acc_norm": 0.338, "acc_norm_stderr": 0.014965960710224472, "acc_stderr": 0.014987482264363935, "subset": 2}, "MNLI crowdsource": {"acc": 0.325, "acc_norm": 0.321, "acc_norm_stderr": 0.014770821817934645, "acc_stderr": 0.014818724459095524, "subset": 2}, "can we infer": {"acc": 0.331, "acc_norm": 0.338, "acc_norm_stderr": 0.01496596071022447, "acc_stderr": 0.014888272588203924, "subset": 2}, "guaranteed/possible/impossible": {"acc": 0.314, "acc_norm": 0.312, "acc_norm_stderr": 0.014658474370509012, "acc_stderr": 0.014683991951087973, "subset": 2}, "justified in saying": {"acc": 0.329, "acc_norm": 0.325, "acc_norm_stderr": 0.014818724459095524, "acc_stderr": 0.014865395385928373, "subset": 2}}}, "anli_r3": {"0": {"GPT-3 style": {"acc": 0.32916666666666666, "acc_norm": 0.325, "acc_norm_stderr": 0.013526454480351028, "acc_stderr": 0.013570806258433625, "subset": 3}, "MNLI crowdsource": {"acc": 0.335, "acc_norm": 0.3433333333333333, "acc_norm_stderr": 0.01371263383046586, "acc_stderr": 0.013630871843821476, "subset": 3}, "can we infer": {"acc": 0.3475, "acc_norm": 0.3408333333333333, "acc_norm_stderr": 0.01368860079329693, "acc_stderr": 0.013751753243291852, "subset": 3}, "guaranteed/possible/impossible": {"acc": 0.33166666666666667, "acc_norm": 0.31416666666666665, "acc_norm_stderr": 0.0134053993149841, "acc_stderr": 0.013596836729485166, "subset": 3}, "justified in saying": {"acc": 0.3475, "acc_norm": 0.3408333333333333, "acc_norm_stderr": 0.013688600793296934, "acc_stderr": 0.013751753243291852, "subset": 3}}, "1": {"GPT-3 style": {"acc": 0.35, "acc_norm": 0.3375, "acc_norm_stderr": 0.013655897185463652, "acc_stderr": 0.013774667009018558, "subset": 3}, "MNLI crowdsource": {"acc": 0.33666666666666667, "acc_norm": 0.33666666666666667, "acc_norm_stderr": 0.013647602942406393, "acc_stderr": 0.013647602942406393, "subset": 3}, "can we infer": {"acc": 0.3525, "acc_norm": 0.34, "acc_norm_stderr": 0.013680495725767797, "acc_stderr": 0.01379716491891836, "subset": 3}, "guaranteed/possible/impossible": {"acc": 0.33666666666666667, "acc_norm": 0.33666666666666667, "acc_norm_stderr": 0.013647602942406393, "acc_stderr": 0.013647602942406393, "subset": 3}, "justified in saying": {"acc": 0.3383333333333333, "acc_norm": 0.33, "acc_norm_stderr": 0.013579531277800922, "acc_stderr": 0.013664144006618268, "subset": 3}}, "2": {"GPT-3 style": {"acc": 0.33666666666666667, "acc_norm": 0.3325, "acc_norm_stderr": 0.013605417345710528, "acc_stderr": 0.013647602942406387, "subset": 3}, "MNLI crowdsource": {"acc": 0.32666666666666666, "acc_norm": 0.32916666666666666, "acc_norm_stderr": 0.013570806258433633, "acc_stderr": 0.013544340907003665, "subset": 3}, "can we infer": {"acc": 0.335, "acc_norm": 0.33, "acc_norm_stderr": 0.013579531277800917, "acc_stderr": 0.013630871843821469, "subset": 3}, "guaranteed/possible/impossible": {"acc": 0.32166666666666666, "acc_norm": 0.32, "acc_norm_stderr": 0.013471620929769142, "acc_stderr": 0.013490095282989521, "subset": 3}, "justified in saying": {"acc": 0.3258333333333333, "acc_norm": 0.33, "acc_norm_stderr": 0.013579531277800917, "acc_stderr": 0.01353542204341746, "subset": 3}}, "3": {"GPT-3 style": {"acc": 0.3516666666666667, "acc_norm": 0.3566666666666667, "acc_norm_stderr": 0.013833742805050722, "acc_stderr": 0.013789711695404785, "subset": 3}, "MNLI crowdsource": {"acc": 0.33916666666666667, "acc_norm": 0.325, "acc_norm_stderr": 0.013526454480351018, "acc_stderr": 0.013672343491681808, "subset": 3}, "can we infer": {"acc": 0.345, "acc_norm": 0.3375, "acc_norm_stderr": 0.013655897185463664, "acc_stderr": 0.013728421539454876, "subset": 3}, "guaranteed/possible/impossible": {"acc": 0.3416666666666667, "acc_norm": 0.3425, "acc_norm_stderr": 0.013704669762934728, "acc_stderr": 0.013696658778002515, "subset": 3}, "justified in saying": {"acc": 0.335, "acc_norm": 0.32916666666666666, "acc_norm_stderr": 0.013570806258433625, "acc_stderr": 0.01363087184382147, "subset": 3}}, "4": {"GPT-3 style": {"acc": 0.3466666666666667, "acc_norm": 0.3283333333333333, "acc_norm_stderr": 0.013562032919529019, "acc_stderr": 0.013744022550571946, "subset": 3}, "MNLI crowdsource": {"acc": 0.3225, "acc_norm": 0.32, "acc_norm_stderr": 0.013471620929769139, "acc_stderr": 0.013499258621103249, "subset": 3}, "can we infer": {"acc": 0.32666666666666666, "acc_norm": 0.31, "acc_norm_stderr": 0.013356596331200263, "acc_stderr": 0.013544340907003663, "subset": 3}, "guaranteed/possible/impossible": {"acc": 0.33416666666666667, "acc_norm": 0.3308333333333333, "acc_norm_stderr": 0.013588208070708993, "acc_stderr": 0.01362243481313678, "subset": 3}, "justified in saying": {"acc": 0.3258333333333333, "acc_norm": 0.3175, "acc_norm_stderr": 0.013443538681348052, "acc_stderr": 0.013535422043417462, "subset": 3}}, "5": {"GPT-3 style": {"acc": 0.3325, "acc_norm": 0.33916666666666667, "acc_norm_stderr": 0.01367234349168182, "acc_stderr": 0.013605417345710526, "subset": 3}, "MNLI crowdsource": {"acc": 0.3258333333333333, "acc_norm": 0.31333333333333335, "acc_norm_stderr": 0.013395739415639082, "acc_stderr": 0.013535422043417464, "subset": 3}, "can we infer": {"acc": 0.3175, "acc_norm": 0.32666666666666666, "acc_norm_stderr": 0.013544340907003663, "acc_stderr": 0.013443538681348052, "subset": 3}, "guaranteed/possible/impossible": {"acc": 0.31166666666666665, "acc_norm": 0.31083333333333335, "acc_norm_stderr": 0.01336645784596543, "acc_stderr": 0.013376268790982096, "subset": 3}, "justified in saying": {"acc": 0.31333333333333335, "acc_norm": 0.32166666666666666, "acc_norm_stderr": 0.013490095282989521, "acc_stderr": 0.013395739415639082, "subset": 3}}}, "arc_easy": {"0": {"heres_a_problem": {"acc": 0.22013651877133106, "acc_norm": 0.22013651877133106, "acc_norm_stderr": 0.01210812488346098, "acc_stderr": 0.01210812488346098}, "i_am_hesitating": {"acc": 0.2098976109215017, "acc_norm": 0.2619453924914676, "acc_norm_stderr": 0.012849054826858117, "acc_stderr": 0.011900548748047446}, "multiple_choice": {"acc": 0.2235494880546075, "acc_norm": 0.27303754266211605, "acc_norm_stderr": 0.013019332762635734, "acc_stderr": 0.012174896631202612}, "pick_the_most_correct_option": {"acc": 0.2478956228956229, "acc_norm": 0.2478956228956229, "acc_norm_stderr": 0.008860162361464027, "acc_stderr": 0.008860162361464027}, "qa_options": {"acc": 0.20477815699658702, "acc_norm": 0.2619453924914676, "acc_norm_stderr": 0.012849054826858117, "acc_stderr": 0.0117925443385134}}, "1": {"heres_a_problem": {"acc": 0.2478956228956229, "acc_norm": 0.2478956228956229, "acc_norm_stderr": 0.008860162361464028, "acc_stderr": 0.008860162361464028}, "i_am_hesitating": {"acc": 0.2828282828282828, "acc_norm": 0.2840909090909091, "acc_norm_stderr": 0.009253921261885763, "acc_stderr": 0.009241472775328228}, "multiple_choice": {"acc": 0.2781986531986532, "acc_norm": 0.27525252525252525, "acc_norm_stderr": 0.009164888895174743, "acc_stderr": 0.009195059601583901}, "pick_the_most_correct_option": {"acc": 0.25, "acc_norm": 0.25, "acc_norm_stderr": 0.008885233166386385, "acc_stderr": 0.008885233166386385}, "qa_options": {"acc": 0.2098976109215017, "acc_norm": 0.2525597269624573, "acc_norm_stderr": 0.01269672898020771, "acc_stderr": 0.011900548748047452}}, "2": {"heres_a_problem": {"acc": 0.26136363636363635, "acc_norm": 0.26136363636363635, "acc_norm_stderr": 0.00901583836660821, "acc_stderr": 0.00901583836660821}, "i_am_hesitating": {"acc": 0.2150170648464164, "acc_norm": 0.2645051194539249, "acc_norm_stderr": 0.012889272949313364, "acc_stderr": 0.012005717634133608}, "multiple_choice": {"acc": 0.2760942760942761, "acc_norm": 0.2718855218855219, "acc_norm_stderr": 0.009129795867310489, "acc_stderr": 0.009173559873835262}, "pick_the_most_correct_option": {"acc": 0.26641414141414144, "acc_norm": 0.26641414141414144, "acc_norm_stderr": 0.009071357971078681, "acc_stderr": 0.009071357971078681}, "qa_options": {"acc": 0.21160409556313994, "acc_norm": 0.2636518771331058, "acc_norm_stderr": 0.012875929151297056, "acc_stderr": 0.011935916358632863}}, "3": {"heres_a_problem": {"acc": 0.2508532423208191, "acc_norm": 0.2508532423208191, "acc_norm_stderr": 0.012668198621315433, "acc_stderr": 0.012668198621315433}, "i_am_hesitating": {"acc": 0.2150170648464164, "acc_norm": 0.2645051194539249, "acc_norm_stderr": 0.012889272949313364, "acc_stderr": 0.012005717634133608}, "multiple_choice": {"acc": 0.2697811447811448, "acc_norm": 0.2748316498316498, "acc_norm_stderr": 0.009160538115254952, "acc_stderr": 0.009107527914671064}, "pick_the_most_correct_option": {"acc": 0.2380546075085324, "acc_norm": 0.2380546075085324, "acc_norm_stderr": 0.012445770028026206, "acc_stderr": 0.012445770028026206}, "qa_options": {"acc": 0.2090443686006826, "acc_norm": 0.26706484641638223, "acc_norm_stderr": 0.01292893319649635, "acc_stderr": 0.011882746987406448}}, "4": {"heres_a_problem": {"acc": 0.2593856655290102, "acc_norm": 0.2593856655290102, "acc_norm_stderr": 0.012808273573927094, "acc_stderr": 0.012808273573927094}, "i_am_hesitating": {"acc": 0.22525597269624573, "acc_norm": 0.26791808873720135, "acc_norm_stderr": 0.012942030195136425, "acc_stderr": 0.0122078399954073}, "multiple_choice": {"acc": 0.26262626262626265, "acc_norm": 0.26052188552188554, "acc_norm_stderr": 0.009006435890336588, "acc_stderr": 0.00902986177676375}, "pick_the_most_correct_option": {"acc": 0.24914675767918087, "acc_norm": 0.24914675767918087, "acc_norm_stderr": 0.012639407111926435, "acc_stderr": 0.012639407111926435}, "qa_options": {"acc": 0.22866894197952217, "acc_norm": 0.27047781569965873, "acc_norm_stderr": 0.012980954547659556, "acc_stderr": 0.012272853582540807}}, "5": {"heres_a_problem": {"acc": 0.2622053872053872, "acc_norm": 0.2622053872053872, "acc_norm_stderr": 0.009025197991724831, "acc_stderr": 0.009025197991724831}, "i_am_hesitating": {"acc": 0.27441077441077444, "acc_norm": 0.27525252525252525, "acc_norm_stderr": 0.009164888895174743, "acc_stderr": 0.009156177122244532}, "multiple_choice": {"acc": 0.2030716723549488, "acc_norm": 0.24658703071672355, "acc_norm_stderr": 0.01259572626879013, "acc_stderr": 0.011755899303705582}, "pick_the_most_correct_option": {"acc": 0.2440273037542662, "acc_norm": 0.2440273037542662, "acc_norm_stderr": 0.012551447627856255, "acc_stderr": 0.012551447627856255}, "qa_options": {"acc": 0.2760942760942761, "acc_norm": 0.2786195286195286, "acc_norm_stderr": 0.00919932919502635, "acc_stderr": 0.009173559873835264}}}, "boolq": {"0": {"GPT-3 Style": {"acc": 0.5703333333333334, "acc_norm": 0.6243333333333333, "acc_norm_stderr": 0.008843442555522142, "acc_stderr": 0.009039449353930876}, "after_reading": {"acc": 0.6236666666666667, "acc_norm": 0.5433333333333333, "acc_norm_stderr": 0.009095877403306734, "acc_stderr": 0.008846558976258922}, "exercise": {"acc": 0.6233333333333333, "acc_norm": 0.38233333333333336, "acc_norm_stderr": 0.008873806022763182, "acc_stderr": 0.00884811049411477}, "valid_binary": {"acc": 0.61, "acc_norm": 0.4523333333333333, "acc_norm_stderr": 0.009088646624339617, "acc_stderr": 0.008906539291295217}, "yes_no_question": {"acc": 0.624, "acc_norm": 0.6236666666666667, "acc_norm_stderr": 0.008846558976258922, "acc_stderr": 0.008845002997512754}}, "1": {"GPT-3 Style": {"acc": 0.536, "acc_norm": 0.576, "acc_norm_stderr": 0.009024142344197916, "acc_stderr": 0.009106534814375936}, "after_reading": {"acc": 0.575, "acc_norm": 0.5653333333333334, "acc_norm_stderr": 0.009051951785603833, "acc_stderr": 0.00902693165837962}, "exercise": {"acc": 0.5423333333333333, "acc_norm": 0.541, "acc_norm_stderr": 0.009099483512819305, "acc_stderr": 0.009097447488896774}, "valid_binary": {"acc": 0.5586666666666666, "acc_norm": 0.5546666666666666, "acc_norm_stderr": 0.009075496684215473, "acc_stderr": 0.009067164858263418}, "yes_no_question": {"acc": 0.5476666666666666, "acc_norm": 0.57, "acc_norm_stderr": 0.009040312075041282, "acc_stderr": 0.009088646624339617}}, "2": {"GPT-3 Style": {"acc": 0.5546666666666666, "acc_norm": 0.5843333333333334, "acc_norm_stderr": 0.00899942365775079, "acc_stderr": 0.009075496684215473}, "after_reading": {"acc": 0.593, "acc_norm": 0.5863333333333334, "acc_norm_stderr": 0.008993097934268066, "acc_stderr": 0.00897090625594853}, "exercise": {"acc": 0.5223333333333333, "acc_norm": 0.5156666666666667, "acc_norm_stderr": 0.009125748094153247, "acc_stderr": 0.009121118663627244}, "valid_binary": {"acc": 0.5856666666666667, "acc_norm": 0.5746666666666667, "acc_norm_stderr": 0.009027853030468718, "acc_stderr": 0.008995223478188036}, "yes_no_question": {"acc": 0.579, "acc_norm": 0.5933333333333334, "acc_norm_stderr": 0.008969751860881005, "acc_stderr": 0.009015547313638383}}, "3": {"GPT-3 Style": {"acc": 0.5446666666666666, "acc_norm": 0.5836666666666667, "acc_norm_stderr": 0.009001498317147612, "acc_stderr": 0.009093726495969151}, "after_reading": {"acc": 0.5793333333333334, "acc_norm": 0.571, "acc_norm_stderr": 0.00903771136639389, "acc_stderr": 0.00901457125468042}, "exercise": {"acc": 0.5453333333333333, "acc_norm": 0.5353333333333333, "acc_norm_stderr": 0.009107405418833937, "acc_stderr": 0.009092626403553739}, "valid_binary": {"acc": 0.5883333333333334, "acc_norm": 0.573, "acc_norm_stderr": 0.009032396953831094, "acc_stderr": 0.008986619341172333}, "yes_no_question": {"acc": 0.5733333333333334, "acc_norm": 0.597, "acc_norm_stderr": 0.008956770693005616, "acc_stderr": 0.009031496556538196}}, "4": {"GPT-3 Style": {"acc": 0.551, "acc_norm": 0.58, "acc_norm_stderr": 0.009012606487132153, "acc_stderr": 0.009082611478924378}, "after_reading": {"acc": 0.5696666666666667, "acc_norm": 0.5556666666666666, "acc_norm_stderr": 0.009073469569248527, "acc_stderr": 0.009041170615977855}, "exercise": {"acc": 0.5423333333333333, "acc_norm": 0.5323333333333333, "acc_norm_stderr": 0.009111120825274597, "acc_stderr": 0.009097447488896775}, "valid_binary": {"acc": 0.581, "acc_norm": 0.5746666666666667, "acc_norm_stderr": 0.009027853030468722, "acc_stderr": 0.009009627690997004}, "yes_no_question": {"acc": 0.575, "acc_norm": 0.5986666666666667, "acc_norm_stderr": 0.008950698369218387, "acc_stderr": 0.009026931658379624}}, "5": {"GPT-3 Style": {"acc": 0.5473333333333333, "acc_norm": 0.5796666666666667, "acc_norm_stderr": 0.009013590979636827, "acc_stderr": 0.009089227499483243}, "after_reading": {"acc": 0.5663333333333334, "acc_norm": 0.5553333333333333, "acc_norm_stderr": 0.009074149407514857, "acc_stderr": 0.009049526374650807}, "exercise": {"acc": 0.543, "acc_norm": 0.5323333333333333, "acc_norm_stderr": 0.009111120825274599, "acc_stderr": 0.009096404868252823}, "valid_binary": {"acc": 0.582, "acc_norm": 0.575, "acc_norm_stderr": 0.00902693165837962, "acc_stderr": 0.009006610887558775}, "yes_no_question": {"acc": 0.5706666666666667, "acc_norm": 0.5973333333333334, "acc_norm_stderr": 0.008955564831687461, "acc_stderr": 0.009038582451449428}}}, "cb": {"0": {"GPT-3 style": {"acc": 0.4107142857142857, "acc_stderr": 0.06633634150359538, "f1": 0.1940928270042194}, "MNLI crowdsource": {"acc": 0.4107142857142857, "acc_stderr": 0.06633634150359538, "f1": 0.1940928270042194}, "can we infer": {"acc": 0.5714285714285714, "acc_stderr": 0.06672848092813058, "f1": 0.3983682983682984}, "guaranteed/possible/impossible": {"acc": 0.14285714285714285, "acc_stderr": 0.0471841613625583, "f1": 0.11815661815661815}, "justified in saying": {"acc": 0.6071428571428571, "acc_stderr": 0.0658538889806635, "f1": 0.4233100233100233}}, "1": {"GPT-3 style": {"acc": 0.39285714285714285, "acc_stderr": 0.0658538889806635, "f1": 0.27686156921539234}, "MNLI crowdsource": {"acc": 0.39285714285714285, "acc_stderr": 0.0658538889806635, "f1": 0.2842025699168556}, "can we infer": {"acc": 0.42857142857142855, "acc_stderr": 0.06672848092813058, "f1": 0.29449152542372886}, "guaranteed/possible/impossible": {"acc": 0.39285714285714285, "acc_stderr": 0.0658538889806635, "f1": 0.2842025699168556}, "justified in saying": {"acc": 0.44642857142857145, "acc_stderr": 0.06703189227942398, "f1": 0.31149301825993553}}, "2": {"GPT-3 style": {"acc": 0.44642857142857145, "acc_stderr": 0.06703189227942398, "f1": 0.2956393200295639}, "MNLI crowdsource": {"acc": 0.4107142857142857, "acc_stderr": 0.06633634150359541, "f1": 0.28245294474802674}, "can we infer": {"acc": 0.5, "acc_stderr": 0.06741998624632421, "f1": 0.34717335209946043}, "guaranteed/possible/impossible": {"acc": 0.375, "acc_stderr": 0.06527912098338669, "f1": 0.2567567567567568}, "justified in saying": {"acc": 0.48214285714285715, "acc_stderr": 0.0673769750864465, "f1": 0.33543859649122804}}, "3": {"GPT-3 style": {"acc": 0.39285714285714285, "acc_stderr": 0.0658538889806635, "f1": 0.2596899224806202}, "MNLI crowdsource": {"acc": 0.39285714285714285, "acc_stderr": 0.06585388898066351, "f1": 0.27449576321756775}, "can we infer": {"acc": 0.5, "acc_stderr": 0.06741998624632421, "f1": 0.34491725768321513}, "guaranteed/possible/impossible": {"acc": 0.35714285714285715, "acc_stderr": 0.06460957383809221, "f1": 0.22072072072072071}, "justified in saying": {"acc": 0.44642857142857145, "acc_stderr": 0.067031892279424, "f1": 0.3083804143126177}}, "4": {"GPT-3 style": {"acc": 0.375, "acc_stderr": 0.06527912098338669, "f1": 0.23582089552238802}, "MNLI crowdsource": {"acc": 0.4107142857142857, "acc_stderr": 0.06633634150359541, "f1": 0.28319209039548027}, "can we infer": {"acc": 0.5, "acc_stderr": 0.06741998624632421, "f1": 0.3485060690943043}, "guaranteed/possible/impossible": {"acc": 0.39285714285714285, "acc_stderr": 0.0658538889806635, "f1": 0.24598511555033295}, "justified in saying": {"acc": 0.5, "acc_stderr": 0.06741998624632421, "f1": 0.34717335209946043}}, "5": {"GPT-3 style": {"acc": 0.39285714285714285, "acc_stderr": 0.0658538889806635, "f1": 0.235006435006435}, "MNLI crowdsource": {"acc": 0.35714285714285715, "acc_stderr": 0.06460957383809218, "f1": 0.24859943977591037}, "can we infer": {"acc": 0.44642857142857145, "acc_stderr": 0.067031892279424, "f1": 0.3013431013431014}, "guaranteed/possible/impossible": {"acc": 0.375, "acc_stderr": 0.06527912098338669, "f1": 0.25396825396825395}, "justified in saying": {"acc": 0.44642857142857145, "acc_stderr": 0.067031892279424, "f1": 0.295771144278607}}}, "copa": {"0": {"best_option": {"acc": 0.58, "acc_norm": 0.48, "acc_norm_stderr": 0.050211673156867795, "acc_stderr": 0.049604496374885836}, "cause_effect": {"acc": 0.57, "acc_norm": 0.53, "acc_norm_stderr": 0.05016135580465919, "acc_stderr": 0.049756985195624284}, "choose": {"acc": 0.6, "acc_norm": 0.47, "acc_norm_stderr": 0.05016135580465919, "acc_stderr": 0.04923659639173309}, "i_am_hesitating": {"acc": 0.61, "acc_norm": 0.54, "acc_norm_stderr": 0.05009082659620332, "acc_stderr": 0.049020713000019756}, "plausible_alternatives": {"acc": 0.59, "acc_norm": 0.54, "acc_norm_stderr": 0.05009082659620332, "acc_stderr": 0.049431107042371025}}, "1": {"best_option": {"acc": 0.57, "acc_norm": 0.54, "acc_norm_stderr": 0.05009082659620332, "acc_stderr": 0.049756985195624284}, "cause_effect": {"acc": 0.57, "acc_norm": 0.51, "acc_norm_stderr": 0.05024183937956912, "acc_stderr": 0.04975698519562428}, "choose": {"acc": 0.54, "acc_norm": 0.49, "acc_norm_stderr": 0.05024183937956911, "acc_stderr": 0.05009082659620333}, "i_am_hesitating": {"acc": 0.55, "acc_norm": 0.5, "acc_norm_stderr": 0.050251890762960605, "acc_stderr": 0.05}, "plausible_alternatives": {"acc": 0.53, "acc_norm": 0.49, "acc_norm_stderr": 0.05024183937956912, "acc_stderr": 0.05016135580465919}}, "2": {"best_option": {"acc": 0.56, "acc_norm": 0.51, "acc_norm_stderr": 0.05024183937956912, "acc_stderr": 0.04988876515698589}, "cause_effect": {"acc": 0.53, "acc_norm": 0.48, "acc_norm_stderr": 0.050211673156867795, "acc_stderr": 0.05016135580465919}, "choose": {"acc": 0.53, "acc_norm": 0.51, "acc_norm_stderr": 0.05024183937956911, "acc_stderr": 0.05016135580465919}, "i_am_hesitating": {"acc": 0.52, "acc_norm": 0.49, "acc_norm_stderr": 0.05024183937956912, "acc_stderr": 0.050211673156867795}, "plausible_alternatives": {"acc": 0.49, "acc_norm": 0.47, "acc_norm_stderr": 0.05016135580465919, "acc_stderr": 0.05024183937956912}}, "3": {"best_option": {"acc": 0.63, "acc_norm": 0.54, "acc_norm_stderr": 0.05009082659620332, "acc_stderr": 0.04852365870939099}, "cause_effect": {"acc": 0.53, "acc_norm": 0.51, "acc_norm_stderr": 0.05024183937956911, "acc_stderr": 0.05016135580465919}, "choose": {"acc": 0.48, "acc_norm": 0.49, "acc_norm_stderr": 0.05024183937956912, "acc_stderr": 0.050211673156867795}, "i_am_hesitating": {"acc": 0.52, "acc_norm": 0.46, "acc_norm_stderr": 0.05009082659620332, "acc_stderr": 0.050211673156867795}, "plausible_alternatives": {"acc": 0.49, "acc_norm": 0.48, "acc_norm_stderr": 0.050211673156867795, "acc_stderr": 0.05024183937956912}}, "4": {"best_option": {"acc": 0.59, "acc_norm": 0.53, "acc_norm_stderr": 0.050161355804659205, "acc_stderr": 0.049431107042371025}, "cause_effect": {"acc": 0.52, "acc_norm": 0.48, "acc_norm_stderr": 0.050211673156867795, "acc_stderr": 0.050211673156867795}, "choose": {"acc": 0.5, "acc_norm": 0.45, "acc_norm_stderr": 0.04999999999999999, "acc_stderr": 0.050251890762960605}, "i_am_hesitating": {"acc": 0.5, "acc_norm": 0.47, "acc_norm_stderr": 0.05016135580465919, "acc_stderr": 0.050251890762960605}, "plausible_alternatives": {"acc": 0.48, "acc_norm": 0.45, "acc_norm_stderr": 0.04999999999999999, "acc_stderr": 0.050211673156867795}}, "5": {"best_option": {"acc": 0.58, "acc_norm": 0.56, "acc_norm_stderr": 0.04988876515698589, "acc_stderr": 0.049604496374885836}, "cause_effect": {"acc": 0.51, "acc_norm": 0.48, "acc_norm_stderr": 0.050211673156867795, "acc_stderr": 0.05024183937956911}, "choose": {"acc": 0.48, "acc_norm": 0.48, "acc_norm_stderr": 0.050211673156867795, "acc_stderr": 0.050211673156867795}, "i_am_hesitating": {"acc": 0.49, "acc_norm": 0.47, "acc_norm_stderr": 0.05016135580465919, "acc_stderr": 0.05024183937956912}, "plausible_alternatives": {"acc": 0.46, "acc_norm": 0.47, "acc_norm_stderr": 0.05016135580465919, "acc_stderr": 0.05009082659620332}}}, "e2e_nlg_cleaned": {"0": {"coherent_text": {"bleu": 2.7137587007909403, "bleu_stderr": 0.04289535874730102, "rouge1_fmeasure": 0.27875544176267436, "rouge1_fmeasure_stderr": 0.0030398850526226597, "rouge1_precision": 0.23420804927860228, "rouge1_precision_stderr": 0.0029343193986469574, "rouge1_recall": 0.368972333302577, "rouge1_recall_stderr": 0.0033242062731447717, "rouge2_fmeasure": 0.0995456556042845, "rouge2_fmeasure_stderr": 0.001576544642160748, "rouge2_precision": 0.08419089263107597, "rouge2_precision_stderr": 0.0014122078430193529, "rouge2_recall": 0.1301132540842083, "rouge2_recall_stderr": 0.0019763854755703455, "rougeL_fmeasure": 0.22171841937262945, "rougeL_fmeasure_stderr": 0.0019873580895336043, "rougeL_precision": 0.18337042886990745, "rougeL_precision_stderr": 0.0018758190997753684, "rougeL_recall": 0.3011863301059313, "rougeL_recall_stderr": 0.002348188787579532, "rougeLsum_fmeasure": 0.23024990094522646, "rougeLsum_fmeasure_stderr": 0.0024068830580772417, "rougeLsum_precision": 0.19284679447481895, "rougeLsum_precision_stderr": 0.0023339033454095755, "rougeLsum_recall": 0.30684388196998574, "rougeLsum_recall_stderr": 0.002638945933468923}, "create_text_for_me": {"bleu": 1.5543129195281387, "bleu_stderr": 0.053154855082351425, "rouge1_fmeasure": 0.19711096006674347, "rouge1_fmeasure_stderr": 0.003040616043574641, "rouge1_precision": 0.16696716992161004, "rouge1_precision_stderr": 0.0025805192223558792, "rouge1_recall": 0.2572825872031971, "rouge1_recall_stderr": 0.004212164493341407, "rouge2_fmeasure": 0.05566703607474666, "rouge2_fmeasure_stderr": 0.001468465064769846, "rouge2_precision": 0.04560883470771521, "rouge2_precision_stderr": 0.0012021308966618743, "rouge2_recall": 0.0769178100374361, "rouge2_recall_stderr": 0.002128049838328794, "rougeL_fmeasure": 0.13981728919468525, "rougeL_fmeasure_stderr": 0.00174661286699012, "rougeL_precision": 0.11835106119201162, "rougeL_precision_stderr": 0.0014461966600140647, "rougeL_recall": 0.1833920610587413, "rougeL_recall_stderr": 0.0026168954114977718, "rougeLsum_fmeasure": 0.1613532463046808, "rougeLsum_fmeasure_stderr": 0.0023500023254129318, "rougeLsum_precision": 0.13701764629359647, "rougeLsum_precision_stderr": 0.0019913632568671976, "rougeLsum_recall": 0.21021220612205108, "rougeLsum_recall_stderr": 0.0033048732121641264}, "generate_gramatically_correct_text": {"bleu": 3.357880740552639, "bleu_stderr": 0.05911525500616559, "rouge1_fmeasure": 0.14809698368434102, "rouge1_fmeasure_stderr": 0.0011506472021475076, "rouge1_precision": 0.15817916976766455, "rouge1_precision_stderr": 0.0011411953748066586, "rouge1_recall": 0.1449753921655427, "rouge1_recall_stderr": 0.0013358740390814108, "rouge2_fmeasure": 0.10146880175157826, "rouge2_fmeasure_stderr": 0.0009392019833054901, "rouge2_precision": 0.10846978571330382, "rouge2_precision_stderr": 0.0009492436851693913, "rouge2_recall": 0.0996008655703363, "rouge2_recall_stderr": 0.0010665340200279952, "rougeL_fmeasure": 0.14807737584120376, "rougeL_fmeasure_stderr": 0.0011493153027645868, "rougeL_precision": 0.15815833643433122, "rougeL_precision_stderr": 0.0011396842377164138, "rougeL_recall": 0.14495687364702417, "rougeL_recall_stderr": 0.0013348745641402115, "rougeLsum_fmeasure": 0.14601036905242729, "rougeLsum_fmeasure_stderr": 0.0011113659770113293, "rougeLsum_precision": 0.15601081480161114, "rougeLsum_precision_stderr": 0.001103835539038039, "rougeLsum_recall": 0.14288832426456227, "rougeLsum_recall_stderr": 0.0012946708510962023}, "generate_text_restaurant": {"bleu": 0.034167755941272124, "bleu_stderr": 0.007307628734226665, "rouge1_fmeasure": 0.1471748484883756, "rouge1_fmeasure_stderr": 0.0009958043221996186, "rouge1_precision": 0.19301807890145306, "rouge1_precision_stderr": 0.001670641964229549, "rouge1_recall": 0.1302779588596268, "rouge1_recall_stderr": 0.0009550048156890856, "rouge2_fmeasure": 0.0022741562354756397, "rouge2_fmeasure_stderr": 0.0001859749506718914, "rouge2_precision": 0.0024668841759750483, "rouge2_precision_stderr": 0.00021558605140782248, "rouge2_recall": 0.0024274910482947904, "rouge2_recall_stderr": 0.00020167632183355254, "rougeL_fmeasure": 0.12313336484112318, "rougeL_fmeasure_stderr": 0.0007638110810765412, "rougeL_precision": 0.1613842777209177, "rougeL_precision_stderr": 0.0012980656999503429, "rougeL_recall": 0.10906527422415299, "rougeL_recall_stderr": 0.0007641988419152662, "rougeLsum_fmeasure": 0.1321175572520423, "rougeLsum_fmeasure_stderr": 0.0009257711780790996, "rougeLsum_precision": 0.1737032149967068, "rougeLsum_precision_stderr": 0.0015574347646916537, "rougeLsum_recall": 0.11681049061765152, "rougeLsum_recall_stderr": 0.0008822320800065422}, "text": {"bleu": 3.1072073402263745, "bleu_stderr": 0.054847709586604974, "rouge1_fmeasure": 0.3192562818462074, "rouge1_fmeasure_stderr": 0.003103481656865381, "rouge1_precision": 0.2904834399201357, "rouge1_precision_stderr": 0.003209479431805363, "rouge1_recall": 0.378959200854369, "rouge1_recall_stderr": 0.0033573634989133632, "rouge2_fmeasure": 0.13831925575429133, "rouge2_fmeasure_stderr": 0.0014991664215535973, "rouge2_precision": 0.12493081488647073, "rouge2_precision_stderr": 0.0014706683187466658, "rouge2_recall": 0.16664906976511587, "rouge2_recall_stderr": 0.001807700709560424, "rougeL_fmeasure": 0.2530898959654513, "rougeL_fmeasure_stderr": 0.002233323932714259, "rougeL_precision": 0.22666655050825654, "rougeL_precision_stderr": 0.0021721413918810626, "rougeL_recall": 0.30700944872644476, "rougeL_recall_stderr": 0.0027337619577674933, "rougeLsum_fmeasure": 0.27039500281410955, "rougeLsum_fmeasure_stderr": 0.002525774163861117, "rougeLsum_precision": 0.24444049681203178, "rougeLsum_precision_stderr": 0.0025608900739077897, "rougeLsum_recall": 0.32402288867669254, "rougeLsum_recall_stderr": 0.0028828379788400547}}, "1": {"coherent_text": {"bleu": 4.776883166568447, "bleu_stderr": 0.06188581593802797, "rouge1_fmeasure": 0.2834915331439006, "rouge1_fmeasure_stderr": 0.00286004072375514, "rouge1_precision": 0.32376895607264333, "rouge1_precision_stderr": 0.003559481233250432, "rouge1_recall": 0.2766644901218466, "rouge1_recall_stderr": 0.003006971082101755, "rouge2_fmeasure": 0.09798530798331949, "rouge2_fmeasure_stderr": 0.001591303023518874, "rouge2_precision": 0.11422580842443067, "rouge2_precision_stderr": 0.001981771261667616, "rouge2_recall": 0.09502385216159488, "rouge2_recall_stderr": 0.0016087538033057863, "rougeL_fmeasure": 0.20604394935503653, "rougeL_fmeasure_stderr": 0.0021945968399171094, "rougeL_precision": 0.23647808916387403, "rougeL_precision_stderr": 0.002785237759844657, "rougeL_recall": 0.20111037531455733, "rougeL_recall_stderr": 0.0023137520458429714, "rougeLsum_fmeasure": 0.23271534495205093, "rougeLsum_fmeasure_stderr": 0.002496962503511146, "rougeLsum_precision": 0.26668028425156176, "rougeLsum_precision_stderr": 0.0031249451907287393, "rougeLsum_recall": 0.22678921457291987, "rougeLsum_recall_stderr": 0.0026010547211686662}, "create_text_for_me": {"bleu": 4.297512923603484, "bleu_stderr": 0.07413629268869817, "rouge1_fmeasure": 0.26520071914836685, "rouge1_fmeasure_stderr": 0.003003658166661617, "rouge1_precision": 0.3020908787512572, "rouge1_precision_stderr": 0.0036921538937666133, "rouge1_recall": 0.25987043503346535, "rouge1_recall_stderr": 0.003141567879746835, "rouge2_fmeasure": 0.08973585628096593, "rouge2_fmeasure_stderr": 0.0015772173702039572, "rouge2_precision": 0.10465034015071716, "rouge2_precision_stderr": 0.0019691347076890445, "rouge2_recall": 0.08719562064392963, "rouge2_recall_stderr": 0.0015853267824143741, "rougeL_fmeasure": 0.1923500412945955, "rougeL_fmeasure_stderr": 0.002278192833793113, "rougeL_precision": 0.2204806269646829, "rougeL_precision_stderr": 0.0028685459881547795, "rougeL_recall": 0.18825101434769329, "rougeL_recall_stderr": 0.002377652096200304, "rougeLsum_fmeasure": 0.21805867415782276, "rougeLsum_fmeasure_stderr": 0.002597589024739917, "rougeLsum_precision": 0.24946111667640178, "rougeLsum_precision_stderr": 0.0032258053795631643, "rougeLsum_recall": 0.2131990405760118, "rougeLsum_recall_stderr": 0.0026891365363325335}, "generate_gramatically_correct_text": {"bleu": 4.2427683279460675, "bleu_stderr": 0.06477494709625758, "rouge1_fmeasure": 0.21181656379511446, "rouge1_fmeasure_stderr": 0.0020085926283200336, "rouge1_precision": 0.21918225259331264, "rouge1_precision_stderr": 0.00210014169544477, "rouge1_recall": 0.22082556498109132, "rouge1_recall_stderr": 0.0024742493122335047, "rouge2_fmeasure": 0.11107876311211701, "rouge2_fmeasure_stderr": 0.001083915673794437, "rouge2_precision": 0.1163871008313897, "rouge2_precision_stderr": 0.001129914509168705, "rouge2_recall": 0.11332262657648863, "rouge2_recall_stderr": 0.0012724108998245058, "rougeL_fmeasure": 0.18333822193791077, "rougeL_fmeasure_stderr": 0.0013426559404332522, "rougeL_precision": 0.19106349917495133, "rougeL_precision_stderr": 0.0014505997962077457, "rougeL_recall": 0.1894466427618812, "rougeL_recall_stderr": 0.0017589499235787709, "rougeLsum_fmeasure": 0.19201664569930915, "rougeLsum_fmeasure_stderr": 0.0016232028917454438, "rougeLsum_precision": 0.19958112129901742, "rougeLsum_precision_stderr": 0.0017237243835404959, "rougeLsum_recall": 0.1990541967175752, "rougeLsum_recall_stderr": 0.002038778096964972}, "generate_text_restaurant": {"bleu": 6.480953332643209, "bleu_stderr": 0.1333798473522777, "rouge1_fmeasure": 0.34215503945412495, "rouge1_fmeasure_stderr": 0.002167322468153194, "rouge1_precision": 0.40802697478059297, "rouge1_precision_stderr": 0.0029673602294223223, "rouge1_recall": 0.3261039583636633, "rouge1_recall_stderr": 0.0025124189387771036, "rouge2_fmeasure": 0.12715929154890004, "rouge2_fmeasure_stderr": 0.0016365489693603613, "rouge2_precision": 0.153945929557076, "rouge2_precision_stderr": 0.00209119791686883, "rouge2_recall": 0.12079622988004886, "rouge2_recall_stderr": 0.0016791741735485287, "rougeL_fmeasure": 0.25061818922473783, "rougeL_fmeasure_stderr": 0.0017641406764575026, "rougeL_precision": 0.3007493740437511, "rougeL_precision_stderr": 0.002487305156158438, "rougeL_recall": 0.23850764812452344, "rougeL_recall_stderr": 0.0019836094262200904, "rougeLsum_fmeasure": 0.28167871848928877, "rougeLsum_fmeasure_stderr": 0.0020153231718161705, "rougeLsum_precision": 0.3365871813932031, "rougeLsum_precision_stderr": 0.002735631481473113, "rougeLsum_recall": 0.26820407020047227, "rougeLsum_recall_stderr": 0.002246895665921754}, "text": {"bleu": 5.768904743696091, "bleu_stderr": 0.07694130893367676, "rouge1_fmeasure": 0.32587000405265004, "rouge1_fmeasure_stderr": 0.002055881315827058, "rouge1_precision": 0.38598841646744675, "rouge1_precision_stderr": 0.0029352063511176927, "rouge1_recall": 0.3120167715681631, "rouge1_recall_stderr": 0.002282231208925737, "rouge2_fmeasure": 0.11624053272466801, "rouge2_fmeasure_stderr": 0.001498193127795664, "rouge2_precision": 0.141089766958126, "rouge2_precision_stderr": 0.0019905359417947705, "rouge2_recall": 0.1103023596204418, "rouge2_recall_stderr": 0.0014920441369873467, "rougeL_fmeasure": 0.24041537230720197, "rougeL_fmeasure_stderr": 0.001681675906589535, "rougeL_precision": 0.28626570995375356, "rougeL_precision_stderr": 0.0024472272782447135, "rougeL_recall": 0.23012511993056003, "rougeL_recall_stderr": 0.0018339215891965562, "rougeLsum_fmeasure": 0.2683840002761712, "rougeLsum_fmeasure_stderr": 0.0019114719552552634, "rougeLsum_precision": 0.31879426513272485, "rougeLsum_precision_stderr": 0.002698431810379339, "rougeLsum_recall": 0.25680714373294367, "rougeLsum_recall_stderr": 0.002060514880618308}}, "2": {"coherent_text": {"bleu": 6.367232170877392, "bleu_stderr": 0.08549395921417903, "rouge1_fmeasure": 0.3289239334300976, "rouge1_fmeasure_stderr": 0.002499918188495706, "rouge1_precision": 0.3773758975465212, "rouge1_precision_stderr": 0.0033199301919147046, "rouge1_recall": 0.32188781676797334, "rouge1_recall_stderr": 0.002721512158082673, "rouge2_fmeasure": 0.12398725975487611, "rouge2_fmeasure_stderr": 0.0016074904321220344, "rouge2_precision": 0.14546628155822858, "rouge2_precision_stderr": 0.002076024275324282, "rouge2_recall": 0.12068762525775042, "rouge2_recall_stderr": 0.0016561296699671543, "rougeL_fmeasure": 0.24355724501015555, "rougeL_fmeasure_stderr": 0.0019784432789600953, "rougeL_precision": 0.2810377453951778, "rougeL_precision_stderr": 0.0027020897811990126, "rougeL_recall": 0.23833759611575786, "rougeL_recall_stderr": 0.0021523119739722788, "rougeLsum_fmeasure": 0.2728731326289227, "rougeLsum_fmeasure_stderr": 0.002250126526299403, "rougeLsum_precision": 0.3138640600673944, "rougeLsum_precision_stderr": 0.0029811254616978994, "rougeLsum_recall": 0.26690385060781147, "rougeLsum_recall_stderr": 0.0024253237610957243}, "create_text_for_me": {"bleu": 5.867162895941705, "bleu_stderr": 0.08645931541468421, "rouge1_fmeasure": 0.31591363153481733, "rouge1_fmeasure_stderr": 0.002562871100672284, "rouge1_precision": 0.36350270495948706, "rouge1_precision_stderr": 0.003355398168023335, "rouge1_recall": 0.30887853999055825, "rouge1_recall_stderr": 0.002783892987479659, "rouge2_fmeasure": 0.11659264308512794, "rouge2_fmeasure_stderr": 0.0015973372961016513, "rouge2_precision": 0.13698348670528768, "rouge2_precision_stderr": 0.0020426287273337175, "rouge2_recall": 0.1136012858172398, "rouge2_recall_stderr": 0.0016553465039351092, "rougeL_fmeasure": 0.2340979483687512, "rougeL_fmeasure_stderr": 0.002014832984376413, "rougeL_precision": 0.27091169751519845, "rougeL_precision_stderr": 0.0027121576953348142, "rougeL_recall": 0.2289429812438254, "rougeL_recall_stderr": 0.0021934381495222347, "rougeLsum_fmeasure": 0.26219793663870944, "rougeLsum_fmeasure_stderr": 0.0022935285784009335, "rougeLsum_precision": 0.30254398731438875, "rougeLsum_precision_stderr": 0.003008055599147345, "rougeLsum_recall": 0.256276398824361, "rougeLsum_recall_stderr": 0.0024731203037475064}, "generate_gramatically_correct_text": {"bleu": 5.051230966031554, "bleu_stderr": 0.13591434741266697, "rouge1_fmeasure": 0.24111306272957858, "rouge1_fmeasure_stderr": 0.0024738591515231123, "rouge1_precision": 0.257733606387396, "rouge1_precision_stderr": 0.002854189812923714, "rouge1_recall": 0.2484296444250448, "rouge1_recall_stderr": 0.0028744090194746633, "rouge2_fmeasure": 0.11330107614177073, "rouge2_fmeasure_stderr": 0.0013577176314569378, "rouge2_precision": 0.12122028894875721, "rouge2_precision_stderr": 0.001566218681260604, "rouge2_recall": 0.11563987732519745, "rouge2_recall_stderr": 0.0015422732429049785, "rougeL_fmeasure": 0.19848666892850747, "rougeL_fmeasure_stderr": 0.0016655731215332195, "rougeL_precision": 0.21371701405030952, "rougeL_precision_stderr": 0.0020806238798557787, "rougeL_recall": 0.20335467777797223, "rougeL_recall_stderr": 0.0020284517357400562, "rougeLsum_fmeasure": 0.21119700236219294, "rougeLsum_fmeasure_stderr": 0.002040241369573437, "rougeLsum_precision": 0.22672124933887802, "rougeLsum_precision_stderr": 0.002428427353019781, "rougeLsum_recall": 0.21696666131837813, "rougeLsum_recall_stderr": 0.002404135899742133}, "generate_text_restaurant": {"bleu": 7.666105297947792, "bleu_stderr": 0.13136205041430496, "rouge1_fmeasure": 0.3560820001983814, "rouge1_fmeasure_stderr": 0.0021795777882845308, "rouge1_precision": 0.4208785617206442, "rouge1_precision_stderr": 0.0030817416724062204, "rouge1_recall": 0.3424204393428585, "rouge1_recall_stderr": 0.0025248373231055543, "rouge2_fmeasure": 0.14362551846595048, "rouge2_fmeasure_stderr": 0.001639558679939512, "rouge2_precision": 0.17233048865607528, "rouge2_precision_stderr": 0.002143190552622057, "rouge2_recall": 0.1383316167107616, "rouge2_recall_stderr": 0.0017503067979860562, "rougeL_fmeasure": 0.2663812276115756, "rougeL_fmeasure_stderr": 0.0018062222315789584, "rougeL_precision": 0.31589969893422243, "rougeL_precision_stderr": 0.002592218877775354, "rougeL_recall": 0.2566141330684389, "rougeL_recall_stderr": 0.0020689228041648397, "rougeLsum_fmeasure": 0.2965891442412763, "rougeLsum_fmeasure_stderr": 0.0020592217464907307, "rougeLsum_precision": 0.3509539248702352, "rougeLsum_precision_stderr": 0.0028422730753440103, "rougeLsum_recall": 0.2852154582143963, "rougeLsum_recall_stderr": 0.0023210148627271433}, "text": {"bleu": 6.623187234910589, "bleu_stderr": 0.09284748268311233, "rouge1_fmeasure": 0.336751496891913, "rouge1_fmeasure_stderr": 0.002100952186916202, "rouge1_precision": 0.40593215484811684, "rouge1_precision_stderr": 0.003106340108516954, "rouge1_recall": 0.32006154223100713, "rouge1_recall_stderr": 0.002324542554838695, "rouge2_fmeasure": 0.12917141922678907, "rouge2_fmeasure_stderr": 0.001569747986831894, "rouge2_precision": 0.15978879415281969, "rouge2_precision_stderr": 0.0021982896081617956, "rouge2_recall": 0.12215543191803664, "rouge2_recall_stderr": 0.0015773240400394131, "rougeL_fmeasure": 0.2561811353185624, "rougeL_fmeasure_stderr": 0.0017410449710564626, "rougeL_precision": 0.3108908870060753, "rougeL_precision_stderr": 0.002663105799005185, "rougeL_recall": 0.24343947890677778, "rougeL_recall_stderr": 0.0018898553924389825, "rougeLsum_fmeasure": 0.28203219973113697, "rougeLsum_fmeasure_stderr": 0.0019575989714408007, "rougeLsum_precision": 0.34104283897670223, "rougeLsum_precision_stderr": 0.0028793434150954354, "rougeLsum_recall": 0.2679333081128248, "rougeLsum_recall_stderr": 0.002109219796922622}}, "3": {"coherent_text": {"bleu": 7.254477332274835, "bleu_stderr": 0.11215533468282095, "rouge1_fmeasure": 0.3494166954536371, "rouge1_fmeasure_stderr": 0.0022556130694930294, "rouge1_precision": 0.4071643250852113, "rouge1_precision_stderr": 0.0030918195348976214, "rouge1_recall": 0.3384520664356995, "rouge1_recall_stderr": 0.0025658424963617885, "rouge2_fmeasure": 0.1372251409323741, "rouge2_fmeasure_stderr": 0.0016327922809273173, "rouge2_precision": 0.16367687467995234, "rouge2_precision_stderr": 0.0021599397295835646, "rouge2_recall": 0.1321193097997345, "rouge2_recall_stderr": 0.001688691833299027, "rougeL_fmeasure": 0.2616315622148955, "rougeL_fmeasure_stderr": 0.0018615767022711104, "rougeL_precision": 0.30714914534792875, "rougeL_precision_stderr": 0.002654116948033462, "rougeL_recall": 0.2531238455203022, "rougeL_recall_stderr": 0.002079981518486239, "rougeLsum_fmeasure": 0.29126143372342, "rougeLsum_fmeasure_stderr": 0.0020762541949325723, "rougeLsum_precision": 0.34078206920893017, "rougeLsum_precision_stderr": 0.002868278542304959, "rougeLsum_recall": 0.2818084570295315, "rougeLsum_recall_stderr": 0.0023069075160404608}, "create_text_for_me": {"bleu": 6.900082747494956, "bleu_stderr": 0.11320138722526062, "rouge1_fmeasure": 0.3408847326310388, "rouge1_fmeasure_stderr": 0.002233633388817911, "rouge1_precision": 0.40185459499224246, "rouge1_precision_stderr": 0.0031344547603992636, "rouge1_recall": 0.3286363837625974, "rouge1_recall_stderr": 0.002532026641586088, "rouge2_fmeasure": 0.13197675444985899, "rouge2_fmeasure_stderr": 0.001607348415179031, "rouge2_precision": 0.16009806566984877, "rouge2_precision_stderr": 0.0021974508251131177, "rouge2_recall": 0.12657236948027373, "rouge2_recall_stderr": 0.0016617454893539316, "rougeL_fmeasure": 0.2567441918347381, "rougeL_fmeasure_stderr": 0.0018394022581844677, "rougeL_precision": 0.3052474939342758, "rougeL_precision_stderr": 0.0027064485265094005, "rougeL_recall": 0.2472552075503705, "rougeL_recall_stderr": 0.0020502376164631794, "rougeLsum_fmeasure": 0.2849098056411087, "rougeLsum_fmeasure_stderr": 0.0020490356453933285, "rougeLsum_precision": 0.3376547544199217, "rougeLsum_precision_stderr": 0.002925952642220336, "rougeLsum_recall": 0.27426355732116464, "rougeLsum_recall_stderr": 0.002262112871842013}, "generate_gramatically_correct_text": {"bleu": 5.994293739144544, "bleu_stderr": 0.13601894534552475, "rouge1_fmeasure": 0.2629515004088073, "rouge1_fmeasure_stderr": 0.002731946803977037, "rouge1_precision": 0.2862040020667472, "rouge1_precision_stderr": 0.003238321483446096, "rouge1_recall": 0.26830082610544254, "rouge1_recall_stderr": 0.003078316721189308, "rouge2_fmeasure": 0.11772014214137563, "rouge2_fmeasure_stderr": 0.0016007488021205615, "rouge2_precision": 0.12802276569983448, "rouge2_precision_stderr": 0.0018589047775403413, "rouge2_recall": 0.11943444822480005, "rouge2_recall_stderr": 0.0017609621427282169, "rougeL_fmeasure": 0.2105240941617669, "rougeL_fmeasure_stderr": 0.0019150789004942635, "rougeL_precision": 0.23037063366617458, "rougeL_precision_stderr": 0.002402866496678994, "rougeL_recall": 0.21411130784418406, "rougeL_recall_stderr": 0.002232311335363655, "rougeLsum_fmeasure": 0.22458434070337602, "rougeLsum_fmeasure_stderr": 0.00230510570165974, "rougeLsum_precision": 0.24519320098691047, "rougeLsum_precision_stderr": 0.0027928419904608157, "rougeLsum_recall": 0.22888580022427765, "rougeLsum_recall_stderr": 0.002615628628359519}, "generate_text_restaurant": {"bleu": 8.56058254286114, "bleu_stderr": 0.15123807567499506, "rouge1_fmeasure": 0.36308270867669845, "rouge1_fmeasure_stderr": 0.002191649011082704, "rouge1_precision": 0.43327921520363066, "rouge1_precision_stderr": 0.003097568614374566, "rouge1_recall": 0.34769690369124, "rouge1_recall_stderr": 0.002543918652030485, "rouge2_fmeasure": 0.155349432200804, "rouge2_fmeasure_stderr": 0.0016980107119711429, "rouge2_precision": 0.18895843732454584, "rouge2_precision_stderr": 0.002286888650673502, "rouge2_recall": 0.14872848902577038, "rouge2_recall_stderr": 0.0018036895683928664, "rougeL_fmeasure": 0.27893233629167613, "rougeL_fmeasure_stderr": 0.0018361143289552099, "rougeL_precision": 0.33501338216238535, "rougeL_precision_stderr": 0.0027058006091930545, "rougeL_recall": 0.267043780702506, "rougeL_recall_stderr": 0.002094068308249151, "rougeLsum_fmeasure": 0.30795704419792747, "rougeLsum_fmeasure_stderr": 0.0020858121305838396, "rougeLsum_precision": 0.3685423655496754, "rougeLsum_precision_stderr": 0.002930111403674498, "rougeLsum_recall": 0.29466685169867624, "rougeLsum_recall_stderr": 0.002347617054544211}, "text": {"bleu": 7.009840500917634, "bleu_stderr": 0.16312381322545783, "rouge1_fmeasure": 0.3430320576782421, "rouge1_fmeasure_stderr": 0.0020936920124836156, "rouge1_precision": 0.42255037024132486, "rouge1_precision_stderr": 0.0031250780400183006, "rouge1_recall": 0.3217085619415695, "rouge1_recall_stderr": 0.0023676195043913673, "rouge2_fmeasure": 0.13699349439736094, "rouge2_fmeasure_stderr": 0.0016218763137671342, "rouge2_precision": 0.17364077254123408, "rouge2_precision_stderr": 0.002316697656460501, "rouge2_recall": 0.12769222375851083, "rouge2_recall_stderr": 0.0016388892071143308, "rougeL_fmeasure": 0.2647497003936544, "rougeL_fmeasure_stderr": 0.0017804230943033162, "rougeL_precision": 0.3285519495441955, "rougeL_precision_stderr": 0.002763683063352353, "rougeL_recall": 0.2480739442674065, "rougeL_recall_stderr": 0.0019648687514971755, "rougeLsum_fmeasure": 0.28884517699797746, "rougeLsum_fmeasure_stderr": 0.0019739366315454183, "rougeLsum_precision": 0.35718768502576903, "rougeLsum_precision_stderr": 0.00295212627343816, "rougeLsum_recall": 0.2706627854942478, "rougeLsum_recall_stderr": 0.0021634238449362055}}, "4": {"coherent_text": {"bleu": 7.513333905948323, "bleu_stderr": 0.14766226863484086, "rouge1_fmeasure": 0.3555881239462119, "rouge1_fmeasure_stderr": 0.0021502692030659297, "rouge1_precision": 0.41870474769317223, "rouge1_precision_stderr": 0.0030519614549617937, "rouge1_recall": 0.3420315436967001, "rouge1_recall_stderr": 0.0024879203984106034, "rouge2_fmeasure": 0.14162675325545648, "rouge2_fmeasure_stderr": 0.0016441659153520706, "rouge2_precision": 0.17051356574406315, "rouge2_precision_stderr": 0.002206652366301817, "rouge2_recall": 0.1358976585047218, "rouge2_recall_stderr": 0.0017083541046320008, "rougeL_fmeasure": 0.2677207737523752, "rougeL_fmeasure_stderr": 0.0017913031037284717, "rougeL_precision": 0.3174787632627764, "rougeL_precision_stderr": 0.0026395803173011943, "rougeL_recall": 0.2572488981524024, "rougeL_recall_stderr": 0.002020155375464557, "rougeLsum_fmeasure": 0.29745641696384, "rougeLsum_fmeasure_stderr": 0.0020148180914382, "rougeLsum_precision": 0.3514161137133654, "rougeLsum_precision_stderr": 0.0028576511858666056, "rougeLsum_recall": 0.2859476044645928, "rougeLsum_recall_stderr": 0.0022615077541335332}, "create_text_for_me": {"bleu": 7.320031747652725, "bleu_stderr": 0.12923534694654085, "rouge1_fmeasure": 0.34908186170828154, "rouge1_fmeasure_stderr": 0.002179669059327166, "rouge1_precision": 0.41515066909927334, "rouge1_precision_stderr": 0.0031037936370394118, "rouge1_recall": 0.33321944407915566, "rouge1_recall_stderr": 0.00246664110665828, "rouge2_fmeasure": 0.13929100917815865, "rouge2_fmeasure_stderr": 0.001648417738388695, "rouge2_precision": 0.17014937408764044, "rouge2_precision_stderr": 0.0022566453563309186, "rouge2_recall": 0.13226023424874878, "rouge2_recall_stderr": 0.0016784568847184593, "rougeL_fmeasure": 0.26448509530378633, "rougeL_fmeasure_stderr": 0.0018168804060454762, "rougeL_precision": 0.3170205721100898, "rougeL_precision_stderr": 0.002708866680963841, "rougeL_recall": 0.25219110868882494, "rougeL_recall_stderr": 0.0020045842681778495, "rougeLsum_fmeasure": 0.2927886602264322, "rougeLsum_fmeasure_stderr": 0.0020229110972793574, "rougeLsum_precision": 0.34988097982765304, "rougeLsum_precision_stderr": 0.0029179543208130904, "rougeLsum_recall": 0.27907105999460174, "rougeLsum_recall_stderr": 0.002218854839510192}, "generate_gramatically_correct_text": {"bleu": 6.295897046683775, "bleu_stderr": 0.11584821440017962, "rouge1_fmeasure": 0.2770259703392387, "rouge1_fmeasure_stderr": 0.00277806824406267, "rouge1_precision": 0.3075285202414195, "rouge1_precision_stderr": 0.0033945393939165606, "rouge1_recall": 0.2801004454252903, "rouge1_recall_stderr": 0.0031191985284490546, "rouge2_fmeasure": 0.1204704109621881, "rouge2_fmeasure_stderr": 0.0016880105990733003, "rouge2_precision": 0.13304703967240197, "rouge2_precision_stderr": 0.0020060838809736344, "rouge2_recall": 0.12206189669119812, "rouge2_recall_stderr": 0.0018573319105401854, "rougeL_fmeasure": 0.21909517879981463, "rougeL_fmeasure_stderr": 0.0019961120480864922, "rougeL_precision": 0.244640019205779, "rougeL_precision_stderr": 0.0025913691710492746, "rougeL_recall": 0.2210765996441547, "rougeL_recall_stderr": 0.002310185867597945, "rougeLsum_fmeasure": 0.23521486729488383, "rougeLsum_fmeasure_stderr": 0.002390395310603108, "rougeLsum_precision": 0.26210204661833736, "rougeLsum_precision_stderr": 0.0029901819656978864, "rougeLsum_recall": 0.23756462084158286, "rougeLsum_recall_stderr": 0.002680022085644319}, "generate_text_restaurant": {"bleu": 8.949311991420602, "bleu_stderr": 0.15855472500531886, "rouge1_fmeasure": 0.3640451950801677, "rouge1_fmeasure_stderr": 0.0022138110432792396, "rouge1_precision": 0.4375804506933646, "rouge1_precision_stderr": 0.0032055526063119576, "rouge1_recall": 0.3472946902285877, "rouge1_recall_stderr": 0.002539437414200631, "rouge2_fmeasure": 0.15872608958193554, "rouge2_fmeasure_stderr": 0.0017627421907090207, "rouge2_precision": 0.19419957071290792, "rouge2_precision_stderr": 0.002374016494731067, "rouge2_recall": 0.15145528896504676, "rouge2_recall_stderr": 0.001854581130642659, "rougeL_fmeasure": 0.2812482066830368, "rougeL_fmeasure_stderr": 0.0018755893236809696, "rougeL_precision": 0.3396411151186514, "rougeL_precision_stderr": 0.0027863819229066904, "rougeL_recall": 0.26869178202876687, "rougeL_recall_stderr": 0.0021284030057315906, "rougeLsum_fmeasure": 0.31039320669801307, "rougeLsum_fmeasure_stderr": 0.002134177354316102, "rougeLsum_precision": 0.37367146023180203, "rougeLsum_precision_stderr": 0.0030307585676874636, "rougeLsum_recall": 0.29617959760674445, "rougeLsum_recall_stderr": 0.0023937351643062293}, "text": {"bleu": 7.107067863801302, "bleu_stderr": 0.16706105397816348, "rouge1_fmeasure": 0.3436390706497596, "rouge1_fmeasure_stderr": 0.002120851612678163, "rouge1_precision": 0.42460043468036757, "rouge1_precision_stderr": 0.0031591950135965727, "rouge1_recall": 0.3214789344478919, "rouge1_recall_stderr": 0.002360243252337688, "rouge2_fmeasure": 0.1374084834705204, "rouge2_fmeasure_stderr": 0.0016581364245938277, "rouge2_precision": 0.17482332505179643, "rouge2_precision_stderr": 0.002374641561754068, "rouge2_recall": 0.12745182984114253, "rouge2_recall_stderr": 0.0016392080775069744, "rougeL_fmeasure": 0.2647579237213868, "rougeL_fmeasure_stderr": 0.0017899579331400889, "rougeL_precision": 0.32947339330122327, "rougeL_precision_stderr": 0.00275863160551055, "rougeL_recall": 0.24728616508687778, "rougeL_recall_stderr": 0.0019282863931353905, "rougeLsum_fmeasure": 0.2897245093598933, "rougeLsum_fmeasure_stderr": 0.001995382121885309, "rougeLsum_precision": 0.3590563916220248, "rougeLsum_precision_stderr": 0.0029587623344615264, "rougeLsum_recall": 0.2708954741926685, "rougeLsum_recall_stderr": 0.002158968315143282}}, "5": {"coherent_text": {"bleu": 7.595896684409467, "bleu_stderr": 0.11394951713016016, "rouge1_fmeasure": 0.35658924039746537, "rouge1_fmeasure_stderr": 0.002154416981057853, "rouge1_precision": 0.4267054087992628, "rouge1_precision_stderr": 0.003104273111695704, "rouge1_recall": 0.33916614278424423, "rouge1_recall_stderr": 0.002448118122778936, "rouge2_fmeasure": 0.14450895819387546, "rouge2_fmeasure_stderr": 0.0016668350972141967, "rouge2_precision": 0.17671699676015093, "rouge2_precision_stderr": 0.0022460226448767994, "rouge2_recall": 0.13693271552714417, "rouge2_recall_stderr": 0.0017033794821993003, "rougeL_fmeasure": 0.27045280819438955, "rougeL_fmeasure_stderr": 0.0018187716322355006, "rougeL_precision": 0.3255781091740331, "rougeL_precision_stderr": 0.002685842607303053, "rougeL_recall": 0.2571008116825621, "rougeL_recall_stderr": 0.0020239439722082807, "rougeLsum_fmeasure": 0.29917487251322744, "rougeLsum_fmeasure_stderr": 0.002029876079328482, "rougeLsum_precision": 0.35879686272429967, "rougeLsum_precision_stderr": 0.002902004869955475, "rougeLsum_recall": 0.2844216790439025, "rougeLsum_recall_stderr": 0.0022327718696866835}, "create_text_for_me": {"bleu": 7.579729216459941, "bleu_stderr": 0.10379442969910549, "rouge1_fmeasure": 0.3538095251081614, "rouge1_fmeasure_stderr": 0.0021545871849702673, "rouge1_precision": 0.4245878302927168, "rouge1_precision_stderr": 0.0030985552342163206, "rouge1_recall": 0.3351370843643141, "rouge1_recall_stderr": 0.0024378280020133556, "rouge2_fmeasure": 0.14278771616071478, "rouge2_fmeasure_stderr": 0.0016729015640305864, "rouge2_precision": 0.17516980829315892, "rouge2_precision_stderr": 0.0022649531690162985, "rouge2_recall": 0.13470386962627315, "rouge2_recall_stderr": 0.001692225627937733, "rougeL_fmeasure": 0.26921100421071603, "rougeL_fmeasure_stderr": 0.0018239111737229203, "rougeL_precision": 0.32504959215644774, "rougeL_precision_stderr": 0.002692604155146814, "rougeL_recall": 0.2547905097073822, "rougeL_recall_stderr": 0.002009386966136313, "rougeLsum_fmeasure": 0.2975768458732384, "rougeLsum_fmeasure_stderr": 0.002040211020017694, "rougeLsum_precision": 0.3579093459662674, "rougeLsum_precision_stderr": 0.0029111349234790906, "rougeLsum_recall": 0.2817445669200069, "rougeLsum_recall_stderr": 0.002227797407847849}, "generate_gramatically_correct_text": {"bleu": 6.703087166294118, "bleu_stderr": 0.13595717497233803, "rouge1_fmeasure": 0.29157203264366427, "rouge1_fmeasure_stderr": 0.0028155194363036675, "rouge1_precision": 0.32943509738961013, "rouge1_precision_stderr": 0.0035748420223814196, "rouge1_recall": 0.2922771657368818, "rouge1_recall_stderr": 0.0031239846598688573, "rouge2_fmeasure": 0.12548313742099432, "rouge2_fmeasure_stderr": 0.0017504707119138294, "rouge2_precision": 0.14104223605836952, "rouge2_precision_stderr": 0.00213159571945594, "rouge2_recall": 0.12618593841822234, "rouge2_recall_stderr": 0.0018876455590173392, "rougeL_fmeasure": 0.22747557648943484, "rougeL_fmeasure_stderr": 0.002019174394965523, "rougeL_precision": 0.2584532209858507, "rougeL_precision_stderr": 0.002742447042465613, "rougeL_recall": 0.22794142525227126, "rougeL_recall_stderr": 0.0023233899066376004, "rougeLsum_fmeasure": 0.24501087162389928, "rougeLsum_fmeasure_stderr": 0.002431781683361551, "rougeLsum_precision": 0.2777892174460894, "rougeLsum_precision_stderr": 0.0031494031496789106, "rougeLsum_recall": 0.24548258109140436, "rougeLsum_recall_stderr": 0.002697258051680226}, "generate_text_restaurant": {"bleu": 9.131873104913453, "bleu_stderr": 0.1609494007900667, "rouge1_fmeasure": 0.3666709721169093, "rouge1_fmeasure_stderr": 0.0021727553535898286, "rouge1_precision": 0.4394755159466518, "rouge1_precision_stderr": 0.0032068250726382916, "rouge1_recall": 0.35023402614950333, "rouge1_recall_stderr": 0.0025006999002648547, "rouge2_fmeasure": 0.16117969333074014, "rouge2_fmeasure_stderr": 0.0017520909045254573, "rouge2_precision": 0.19707980839947012, "rouge2_precision_stderr": 0.0023944009918063663, "rouge2_recall": 0.15380875597263907, "rouge2_recall_stderr": 0.0018326439588638127, "rougeL_fmeasure": 0.2867364556172849, "rougeL_fmeasure_stderr": 0.0018811550503636599, "rougeL_precision": 0.3455815272270704, "rougeL_precision_stderr": 0.0028520716467400477, "rougeL_recall": 0.2740572130821665, "rougeL_recall_stderr": 0.0021232292917631344, "rougeLsum_fmeasure": 0.3144340022223061, "rougeLsum_fmeasure_stderr": 0.002098469397474867, "rougeLsum_precision": 0.3778806601818275, "rougeLsum_precision_stderr": 0.0030588224104805, "rougeLsum_recall": 0.3002582190319247, "rougeLsum_recall_stderr": 0.0023482445311229245}, "text": {"bleu": 7.274383164727865, "bleu_stderr": 0.11122428262789567, "rouge1_fmeasure": 0.3469575519675918, "rouge1_fmeasure_stderr": 0.0021034752646403756, "rouge1_precision": 0.43124377131693664, "rouge1_precision_stderr": 0.003211030428742271, "rouge1_recall": 0.3239350199889235, "rouge1_recall_stderr": 0.002364971988949829, "rouge2_fmeasure": 0.13963971276485945, "rouge2_fmeasure_stderr": 0.0016673499985951031, "rouge2_precision": 0.17902835655762828, "rouge2_precision_stderr": 0.002400473530717413, "rouge2_recall": 0.1292480377318174, "rouge2_recall_stderr": 0.0016477268883042123, "rougeL_fmeasure": 0.26834384059308486, "rougeL_fmeasure_stderr": 0.0018122635157761808, "rougeL_precision": 0.3361429661493787, "rougeL_precision_stderr": 0.002859024482019008, "rougeL_recall": 0.250039081212901, "rougeL_recall_stderr": 0.0019599039144951795, "rougeLsum_fmeasure": 0.29352695616349106, "rougeLsum_fmeasure_stderr": 0.0019956203711713662, "rougeLsum_precision": 0.36599409686858575, "rougeLsum_precision_stderr": 0.003031621767071787, "rougeLsum_recall": 0.27374374119186806, "rougeLsum_recall_stderr": 0.0021570988272421876}}}, "gem_xsum": {"0": {"DOC_boils_down_to_simple_idea_that": {"bleu": 0.6380986430759009, "bleu_stderr": 0.09497300417280843, "rouge1_fmeasure": 0.12154953382545712, "rouge1_fmeasure_stderr": 0.0019861476396373216, "rouge1_precision": 0.08690252895592265, "rouge1_precision_stderr": 0.0014696237802313788, "rouge1_recall": 0.21076760136855094, "rouge1_recall_stderr": 0.0033990058849462626, "rouge2_fmeasure": 0.01678428011344468, "rouge2_fmeasure_stderr": 0.0009370052903803109, "rouge2_precision": 0.011835058486175484, "rouge2_precision_stderr": 0.0006782998894859348, "rouge2_recall": 0.03014924646277289, "rouge2_recall_stderr": 0.0016485838218151638, "rougeL_fmeasure": 0.1070578662991953, "rougeL_fmeasure_stderr": 0.0016219964231111727, "rougeL_precision": 0.07649508725038684, "rougeL_precision_stderr": 0.0012036928723305662, "rougeL_recall": 0.18622709296396378, "rougeL_recall_stderr": 0.0028294388338103195, "rougeLsum_fmeasure": 0.0973663233955277, "rougeLsum_fmeasure_stderr": 0.0016371882205093247, "rougeLsum_precision": 0.06936681100471268, "rougeLsum_precision_stderr": 0.0011946703655621098, "rougeLsum_recall": 0.17032808205023134, "rougeLsum_recall_stderr": 0.002899791797120825}, "DOC_tldr": {"bleu": 0.8587282218390164, "bleu_stderr": 0.1092988209328534, "rouge1_fmeasure": 0.1353054364687284, "rouge1_fmeasure_stderr": 0.002260494132460735, "rouge1_precision": 0.09636462071540756, "rouge1_precision_stderr": 0.0016678733192933997, "rouge1_recall": 0.23676664119512633, "rouge1_recall_stderr": 0.00385324499120598, "rouge2_fmeasure": 0.02280597280652988, "rouge2_fmeasure_stderr": 0.0010663990484613007, "rouge2_precision": 0.01612079371257744, "rouge2_precision_stderr": 0.0007668663392734058, "rouge2_recall": 0.0407993478012527, "rouge2_recall_stderr": 0.0018811374457448548, "rougeL_fmeasure": 0.11660002475056051, "rougeL_fmeasure_stderr": 0.0017579801450236503, "rougeL_precision": 0.0829190298668844, "rougeL_precision_stderr": 0.0012981572345846506, "rougeL_recall": 0.2049398850561697, "rougeL_recall_stderr": 0.003055116119810891, "rougeLsum_fmeasure": 0.10415585675049703, "rougeLsum_fmeasure_stderr": 0.0018023677161240045, "rougeLsum_precision": 0.07393255459872417, "rougeLsum_precision_stderr": 0.001314680148354152, "rougeLsum_recall": 0.18411144544228827, "rougeLsum_recall_stderr": 0.0031904748886971623}, "article_DOC_summary": {"bleu": 0.9574482759158731, "bleu_stderr": 0.08037361986575367, "rouge1_fmeasure": 0.1538448102106363, "rouge1_fmeasure_stderr": 0.0023116204942085577, "rouge1_precision": 0.10993607957935606, "rouge1_precision_stderr": 0.0017262014910658778, "rouge1_recall": 0.2671733452707944, "rouge1_recall_stderr": 0.00390137507010999, "rouge2_fmeasure": 0.02588589928345875, "rouge2_fmeasure_stderr": 0.0011070282134188164, "rouge2_precision": 0.01835130554145112, "rouge2_precision_stderr": 0.0007993797195400837, "rouge2_recall": 0.04604209148970363, "rouge2_recall_stderr": 0.001973243132477277, "rougeL_fmeasure": 0.12685128021061992, "rougeL_fmeasure_stderr": 0.0017022653082941958, "rougeL_precision": 0.09053264538484676, "rougeL_precision_stderr": 0.0012769612144616453, "rougeL_recall": 0.22129938057010246, "rougeL_recall_stderr": 0.00293788000817368, "rougeLsum_fmeasure": 0.11937933477851356, "rougeLsum_fmeasure_stderr": 0.0018740476749061043, "rougeLsum_precision": 0.08513932826931309, "rougeLsum_precision_stderr": 0.0013864967970779066, "rougeLsum_recall": 0.208617316083849, "rougeLsum_recall_stderr": 0.003259825355111851}, "summarize_DOC": {"bleu": 0.827022947248256, "bleu_stderr": 0.07927582653684777, "rouge1_fmeasure": 0.13477841342912497, "rouge1_fmeasure_stderr": 0.002063346031228702, "rouge1_precision": 0.09630367276935313, "rouge1_precision_stderr": 0.001534883053115542, "rouge1_recall": 0.23460591986325105, "rouge1_recall_stderr": 0.0035249062730580206, "rouge2_fmeasure": 0.02239979264930804, "rouge2_fmeasure_stderr": 0.000997880305154405, "rouge2_precision": 0.015863901454312258, "rouge2_precision_stderr": 0.000709892449539515, "rouge2_recall": 0.03990372817053524, "rouge2_recall_stderr": 0.0018147573204489273, "rougeL_fmeasure": 0.11875101756591155, "rougeL_fmeasure_stderr": 0.0016571866332250852, "rougeL_precision": 0.08476913174599572, "rougeL_precision_stderr": 0.0012339605708654188, "rougeL_recall": 0.20729064806304343, "rougeL_recall_stderr": 0.0028798441356743133, "rougeLsum_fmeasure": 0.10504435524250771, "rougeLsum_fmeasure_stderr": 0.0016540156460037813, "rougeLsum_precision": 0.07482920590283132, "rougeLsum_precision_stderr": 0.0012132438830642453, "rougeLsum_recall": 0.1845581778085984, "rougeLsum_recall_stderr": 0.0029597709556143095}, "summarize_this_DOC_summary": {"bleu": 0.938533817434639, "bleu_stderr": 0.06661804850792503, "rouge1_fmeasure": 0.14062273915493637, "rouge1_fmeasure_stderr": 0.0022000708799639716, "rouge1_precision": 0.10015017289953904, "rouge1_precision_stderr": 0.0016276274188076708, "rouge1_recall": 0.24643756064476016, "rouge1_recall_stderr": 0.003818429494069263, "rouge2_fmeasure": 0.02364976404549372, "rouge2_fmeasure_stderr": 0.0010941164002693394, "rouge2_precision": 0.016701004797808148, "rouge2_precision_stderr": 0.0007833139485381877, "rouge2_recall": 0.04255250278955721, "rouge2_recall_stderr": 0.0020010362587043313, "rougeL_fmeasure": 0.12001163311402531, "rougeL_fmeasure_stderr": 0.0017191781997495605, "rougeL_precision": 0.08541568567160791, "rougeL_precision_stderr": 0.0012793972003565766, "rougeL_recall": 0.21080805741967854, "rougeL_recall_stderr": 0.0030118865157834174, "rougeLsum_fmeasure": 0.10884604959000686, "rougeLsum_fmeasure_stderr": 0.0017934244319739035, "rougeLsum_precision": 0.07735151668808841, "rougeLsum_precision_stderr": 0.0013159109974304754, "rougeLsum_recall": 0.1920023167499489, "rougeLsum_recall_stderr": 0.0032002989755440677}}, "1": {"DOC_boils_down_to_simple_idea_that": {"bleu": 0.4956718515669784, "bleu_stderr": 0.06144134252581695, "rouge1_fmeasure": 0.13006038778542497, "rouge1_fmeasure_stderr": 0.002066345113965779, "rouge1_precision": 0.11321015578844655, "rouge1_precision_stderr": 0.0018833111961485554, "rouge1_recall": 0.18538730900428743, "rouge1_recall_stderr": 0.0037105443176058024, "rouge2_fmeasure": 0.010073796253903308, "rouge2_fmeasure_stderr": 0.0007521556541790673, "rouge2_precision": 0.008030799491936448, "rouge2_precision_stderr": 0.0005974414949521437, "rouge2_recall": 0.01630658025640146, "rouge2_recall_stderr": 0.0013333260688818906, "rougeL_fmeasure": 0.09662643610919455, "rougeL_fmeasure_stderr": 0.001430668974086877, "rougeL_precision": 0.0852182314237414, "rougeL_precision_stderr": 0.0014284391091660574, "rougeL_recall": 0.13603732951225597, "rougeL_recall_stderr": 0.0025190738181756775, "rougeLsum_fmeasure": 0.1033309158326748, "rougeLsum_fmeasure_stderr": 0.00161329986842399, "rougeLsum_precision": 0.09002789354250691, "rougeLsum_precision_stderr": 0.0014892889301457962, "rougeLsum_recall": 0.14770301407061567, "rougeLsum_recall_stderr": 0.002942468251426108}, "DOC_tldr": {"bleu": 0.7674350493376833, "bleu_stderr": 0.04722261703727283, "rouge1_fmeasure": 0.13999313273083266, "rouge1_fmeasure_stderr": 0.0022002140826437623, "rouge1_precision": 0.11994693258726821, "rouge1_precision_stderr": 0.002272664862037966, "rouge1_recall": 0.2063208707620612, "rouge1_recall_stderr": 0.003701405279713943, "rouge2_fmeasure": 0.018246524077524105, "rouge2_fmeasure_stderr": 0.0009676889512530071, "rouge2_precision": 0.01473220123107267, "rouge2_precision_stderr": 0.0008753263893803662, "rouge2_recall": 0.029347661070468774, "rouge2_recall_stderr": 0.0015701480680528376, "rougeL_fmeasure": 0.11473299946041993, "rougeL_fmeasure_stderr": 0.0016673962826521769, "rougeL_precision": 0.097949771945427, "rougeL_precision_stderr": 0.0017742187935061273, "rougeL_recall": 0.17075558002497426, "rougeL_recall_stderr": 0.0029345772747996486, "rougeLsum_fmeasure": 0.10940608132454639, "rougeLsum_fmeasure_stderr": 0.0016933691201315847, "rougeLsum_precision": 0.09414781020562817, "rougeLsum_precision_stderr": 0.0018145520881081784, "rougeLsum_recall": 0.16167126991391795, "rougeLsum_recall_stderr": 0.0029104592060735126}, "article_DOC_summary": {"bleu": 0.8464028940550716, "bleu_stderr": 0.12057452438636702, "rouge1_fmeasure": 0.1452725178193069, "rouge1_fmeasure_stderr": 0.002296281448425658, "rouge1_precision": 0.12072537979214573, "rouge1_precision_stderr": 0.002167454500355592, "rouge1_recall": 0.21838713615156446, "rouge1_recall_stderr": 0.0038259542032858217, "rouge2_fmeasure": 0.01908194699059756, "rouge2_fmeasure_stderr": 0.0010479813934091, "rouge2_precision": 0.014777621125809935, "rouge2_precision_stderr": 0.0008637330993774701, "rouge2_recall": 0.03154673810471188, "rouge2_recall_stderr": 0.001733277417697726, "rougeL_fmeasure": 0.11573111078031734, "rougeL_fmeasure_stderr": 0.001668882154190853, "rougeL_precision": 0.09551583464454579, "rougeL_precision_stderr": 0.0015465079617143938, "rougeL_recall": 0.17601089949518756, "rougeL_recall_stderr": 0.0029605535227822544, "rougeLsum_fmeasure": 0.11300827952303219, "rougeLsum_fmeasure_stderr": 0.0017954449161295192, "rougeLsum_precision": 0.09358138198842439, "rougeLsum_precision_stderr": 0.0016368426037437229, "rougeLsum_recall": 0.1713680364199079, "rougeLsum_recall_stderr": 0.003137326232956779}, "summarize_DOC": {"bleu": 0.9250777542928637, "bleu_stderr": 0.0995464024191013, "rouge1_fmeasure": 0.13585887003860755, "rouge1_fmeasure_stderr": 0.002175476639683441, "rouge1_precision": 0.09967812302039447, "rouge1_precision_stderr": 0.0017535656187891884, "rouge1_recall": 0.23532487213764122, "rouge1_recall_stderr": 0.0037931530072197895, "rouge2_fmeasure": 0.02178998612851725, "rouge2_fmeasure_stderr": 0.001087232464799662, "rouge2_precision": 0.015458406614911558, "rouge2_precision_stderr": 0.0007847122204995969, "rouge2_recall": 0.03955801765769148, "rouge2_recall_stderr": 0.002028121799206321, "rougeL_fmeasure": 0.1159863670452197, "rougeL_fmeasure_stderr": 0.0016981881580244976, "rougeL_precision": 0.08499045094039762, "rougeL_precision_stderr": 0.001391372178337716, "rougeL_recall": 0.20179056951969515, "rougeL_recall_stderr": 0.003044247136206819, "rougeLsum_fmeasure": 0.1064016446915024, "rougeLsum_fmeasure_stderr": 0.0017712110403532188, "rougeLsum_precision": 0.07807531947048092, "rougeLsum_precision_stderr": 0.0014381437479792241, "rougeLsum_recall": 0.1855216996322975, "rougeLsum_recall_stderr": 0.003180975541626223}, "summarize_this_DOC_summary": {"bleu": 1.0178456834586884, "bleu_stderr": 0.09075195516691038, "rouge1_fmeasure": 0.15267367243026655, "rouge1_fmeasure_stderr": 0.0024154109938806533, "rouge1_precision": 0.12052574025109639, "rouge1_precision_stderr": 0.0022210016746994497, "rouge1_recall": 0.24517047230002495, "rouge1_recall_stderr": 0.004088354034068248, "rouge2_fmeasure": 0.023239358291017316, "rouge2_fmeasure_stderr": 0.001176157253837938, "rouge2_precision": 0.01753912194656959, "rouge2_precision_stderr": 0.0009726986505547985, "rouge2_recall": 0.039922933252291584, "rouge2_recall_stderr": 0.0019893681203709793, "rougeL_fmeasure": 0.12230123546095711, "rougeL_fmeasure_stderr": 0.0017734340030687993, "rougeL_precision": 0.09629033131871302, "rougeL_precision_stderr": 0.0016769870522745736, "rougeL_recall": 0.19827068105870446, "rougeL_recall_stderr": 0.0031512156971516287, "rougeLsum_fmeasure": 0.11886986067012077, "rougeLsum_fmeasure_stderr": 0.0019377362527584576, "rougeLsum_precision": 0.09385616207958877, "rougeLsum_precision_stderr": 0.001782715945523954, "rougeLsum_recall": 0.19236967599868202, "rougeLsum_recall_stderr": 0.003399193457550934}}, "2": {"DOC_boils_down_to_simple_idea_that": {"bleu": 0.4649248578493213, "bleu_stderr": 0.0713213099450011, "rouge1_fmeasure": 0.1321353013501063, "rouge1_fmeasure_stderr": 0.0020733569592054284, "rouge1_precision": 0.115303289612093, "rouge1_precision_stderr": 0.0019631897374271976, "rouge1_recall": 0.1867727451057763, "rouge1_recall_stderr": 0.00361199337825402, "rouge2_fmeasure": 0.011705506769985538, "rouge2_fmeasure_stderr": 0.0007650466477809864, "rouge2_precision": 0.009371984198219677, "rouge2_precision_stderr": 0.0006185339230947803, "rouge2_recall": 0.018833588882257533, "rouge2_recall_stderr": 0.0013508882388750508, "rougeL_fmeasure": 0.098676375925058, "rougeL_fmeasure_stderr": 0.0014409776012497158, "rougeL_precision": 0.08671980544288431, "rougeL_precision_stderr": 0.0014435281648377727, "rougeL_recall": 0.13855429960506976, "rougeL_recall_stderr": 0.002496409762111219, "rougeLsum_fmeasure": 0.10364656157003836, "rougeLsum_fmeasure_stderr": 0.0015811434930502115, "rougeLsum_precision": 0.0903490116138123, "rougeLsum_precision_stderr": 0.0014955778687851201, "rougeLsum_recall": 0.14710100552982236, "rougeLsum_recall_stderr": 0.002836465397213484}, "DOC_tldr": {"bleu": 0.9152942535263101, "bleu_stderr": 0.11054552948281171, "rouge1_fmeasure": 0.1432459780290282, "rouge1_fmeasure_stderr": 0.0023213704285313403, "rouge1_precision": 0.1272859637960522, "rouge1_precision_stderr": 0.0027462425850244753, "rouge1_recall": 0.20726108396447318, "rouge1_recall_stderr": 0.003480174389659312, "rouge2_fmeasure": 0.020808658216032574, "rouge2_fmeasure_stderr": 0.001109865173400727, "rouge2_precision": 0.01752090200988848, "rouge2_precision_stderr": 0.0010115578307486322, "rouge2_recall": 0.032264766808514124, "rouge2_recall_stderr": 0.0017258708804944215, "rougeL_fmeasure": 0.11786898777293418, "rougeL_fmeasure_stderr": 0.0017897922596018406, "rougeL_precision": 0.10342334775733361, "rougeL_precision_stderr": 0.002079516722716056, "rougeL_recall": 0.17360666431954774, "rougeL_recall_stderr": 0.0028928707472305787, "rougeLsum_fmeasure": 0.1127637907926372, "rougeLsum_fmeasure_stderr": 0.0018017461138697946, "rougeLsum_precision": 0.09983622506749402, "rougeLsum_precision_stderr": 0.002115393492482011, "rougeLsum_recall": 0.16461169268640363, "rougeLsum_recall_stderr": 0.002819701566946129}, "article_DOC_summary": {"bleu": 0.7984106419011859, "bleu_stderr": 0.1063368416287446, "rouge1_fmeasure": 0.14627685627701034, "rouge1_fmeasure_stderr": 0.0023199421629749627, "rouge1_precision": 0.1277087568466237, "rouge1_precision_stderr": 0.002600936480819687, "rouge1_recall": 0.21240423763263364, "rouge1_recall_stderr": 0.003539650124058849, "rouge2_fmeasure": 0.01990450612056915, "rouge2_fmeasure_stderr": 0.0010553925331886055, "rouge2_precision": 0.016452983669055295, "rouge2_precision_stderr": 0.0009373447546795308, "rouge2_recall": 0.031084003624943502, "rouge2_recall_stderr": 0.0016581096554683838, "rougeL_fmeasure": 0.11915492784443057, "rougeL_fmeasure_stderr": 0.0017454981401993368, "rougeL_precision": 0.10316942850814137, "rougeL_precision_stderr": 0.001971701438481884, "rougeL_recall": 0.17539290041445985, "rougeL_recall_stderr": 0.002825968625575816, "rougeLsum_fmeasure": 0.11487832313530352, "rougeLsum_fmeasure_stderr": 0.0018236145830277607, "rougeLsum_precision": 0.10014675747700362, "rougeLsum_precision_stderr": 0.0020339193953665207, "rougeLsum_recall": 0.16798366392500885, "rougeLsum_recall_stderr": 0.002901019287841266}, "summarize_DOC": {"bleu": 0.7540448660432285, "bleu_stderr": 0.09539115530526798, "rouge1_fmeasure": 0.1349531450843222, "rouge1_fmeasure_stderr": 0.0020232857713247897, "rouge1_precision": 0.09893709052188565, "rouge1_precision_stderr": 0.0017414703745454985, "rouge1_recall": 0.23440267735863968, "rouge1_recall_stderr": 0.003498471803075341, "rouge2_fmeasure": 0.020699762711342613, "rouge2_fmeasure_stderr": 0.0009604579863830597, "rouge2_precision": 0.015158067141100342, "rouge2_precision_stderr": 0.0008079828861487166, "rouge2_recall": 0.037206559662769104, "rouge2_recall_stderr": 0.0017788028367721072, "rougeL_fmeasure": 0.11632088160061539, "rougeL_fmeasure_stderr": 0.001583707769523938, "rougeL_precision": 0.08507459833948786, "rougeL_precision_stderr": 0.0013961283150575271, "rougeL_recall": 0.20301299123076172, "rougeL_recall_stderr": 0.0028262793046608258, "rougeLsum_fmeasure": 0.10493620226111898, "rougeLsum_fmeasure_stderr": 0.0016118116058243702, "rougeLsum_precision": 0.07692952970836399, "rougeLsum_precision_stderr": 0.001416058779535236, "rougeLsum_recall": 0.1833856652655966, "rougeLsum_recall_stderr": 0.0028828605595491133}, "summarize_this_DOC_summary": {"bleu": 1.0056063433565963, "bleu_stderr": 0.12502919332145682, "rouge1_fmeasure": 0.15283133102375288, "rouge1_fmeasure_stderr": 0.002341223307257669, "rouge1_precision": 0.1290901025083865, "rouge1_precision_stderr": 0.0025827449855700116, "rouge1_recall": 0.2345962358974404, "rouge1_recall_stderr": 0.0037295243525013715, "rouge2_fmeasure": 0.02358829661609317, "rouge2_fmeasure_stderr": 0.0011830288079803606, "rouge2_precision": 0.019485063384407045, "rouge2_precision_stderr": 0.0011129553201441335, "rouge2_recall": 0.03790567635691087, "rouge2_recall_stderr": 0.0019138824398120548, "rougeL_fmeasure": 0.12562572604910113, "rougeL_fmeasure_stderr": 0.0017798211682304972, "rougeL_precision": 0.10570526084215368, "rougeL_precision_stderr": 0.0020534248709442697, "rougeL_recall": 0.19497376652941362, "rougeL_recall_stderr": 0.003002241641214748, "rougeLsum_fmeasure": 0.11974611561902486, "rougeLsum_fmeasure_stderr": 0.0018765281940315045, "rougeLsum_precision": 0.10165398752195147, "rougeLsum_precision_stderr": 0.0021383205417301585, "rougeLsum_recall": 0.18432216882062125, "rougeLsum_recall_stderr": 0.003052415691915719}}, "3": {"DOC_boils_down_to_simple_idea_that": {"bleu": 0.6897184873606464, "bleu_stderr": 0.09929118591535131, "rouge1_fmeasure": 0.12726617865153672, "rouge1_fmeasure_stderr": 0.0023059251743363816, "rouge1_precision": 0.11030819972819507, "rouge1_precision_stderr": 0.002126312863119683, "rouge1_recall": 0.18142324171289778, "rouge1_recall_stderr": 0.003822441819750713, "rouge2_fmeasure": 0.012326768637832572, "rouge2_fmeasure_stderr": 0.0008208778930401783, "rouge2_precision": 0.00966434257562582, "rouge2_precision_stderr": 0.0006608656525792718, "rouge2_recall": 0.019946294126745322, "rouge2_recall_stderr": 0.0013598720985991016, "rougeL_fmeasure": 0.09508927308463527, "rougeL_fmeasure_stderr": 0.001653234972150023, "rougeL_precision": 0.0829643367413115, "rougeL_precision_stderr": 0.0015835310219049267, "rougeL_recall": 0.13470266239538448, "rougeL_recall_stderr": 0.002713889010739294, "rougeLsum_fmeasure": 0.10033551040789061, "rougeLsum_fmeasure_stderr": 0.0017901125661630923, "rougeLsum_precision": 0.08664042360478007, "rougeLsum_precision_stderr": 0.0016238957338166923, "rougeLsum_recall": 0.14392387149210184, "rougeLsum_recall_stderr": 0.003047831244734627}, "DOC_tldr": {"bleu": 0.8295010385712954, "bleu_stderr": 0.10729215881030962, "rouge1_fmeasure": 0.14064423672321408, "rouge1_fmeasure_stderr": 0.0025293681234337334, "rouge1_precision": 0.1289330354185616, "rouge1_precision_stderr": 0.0030022141554972227, "rouge1_recall": 0.19771559769715022, "rouge1_recall_stderr": 0.0036123774127182606, "rouge2_fmeasure": 0.02056912761205674, "rouge2_fmeasure_stderr": 0.0010455547309245617, "rouge2_precision": 0.018171280861945326, "rouge2_precision_stderr": 0.0009931234243246655, "rouge2_recall": 0.030785833829197685, "rouge2_recall_stderr": 0.001613050728074892, "rougeL_fmeasure": 0.11535113963082101, "rougeL_fmeasure_stderr": 0.0019250312942772209, "rougeL_precision": 0.10432784481750776, "rougeL_precision_stderr": 0.0022625245110963936, "rougeL_recall": 0.16542514116729834, "rougeL_recall_stderr": 0.002987101666461712, "rougeLsum_fmeasure": 0.11133583672050468, "rougeLsum_fmeasure_stderr": 0.0019306189975949863, "rougeLsum_precision": 0.1017214982197284, "rougeLsum_precision_stderr": 0.0023262186712280047, "rougeLsum_recall": 0.15836180866185806, "rougeLsum_recall_stderr": 0.0029184359176326665}, "article_DOC_summary": {"bleu": 0.9423552122227806, "bleu_stderr": 0.13782619268270152, "rouge1_fmeasure": 0.1467279970360384, "rouge1_fmeasure_stderr": 0.0026790925158139463, "rouge1_precision": 0.13645821087841936, "rouge1_precision_stderr": 0.0030874846619215407, "rouge1_recall": 0.20065597941747373, "rouge1_recall_stderr": 0.0036985721066628736, "rouge2_fmeasure": 0.02153589403065349, "rouge2_fmeasure_stderr": 0.00118190969520155, "rouge2_precision": 0.019488958963057586, "rouge2_precision_stderr": 0.0011775452187260563, "rouge2_recall": 0.030961861144012604, "rouge2_recall_stderr": 0.0017081744279510592, "rougeL_fmeasure": 0.11938767343543244, "rougeL_fmeasure_stderr": 0.002073060283013559, "rougeL_precision": 0.11007763632391766, "rougeL_precision_stderr": 0.002410392914612521, "rougeL_recall": 0.1663257674332732, "rougeL_recall_stderr": 0.0030787485034055036, "rougeLsum_fmeasure": 0.11495841768374035, "rougeLsum_fmeasure_stderr": 0.0021071380507211635, "rougeLsum_precision": 0.10704427774578769, "rougeLsum_precision_stderr": 0.0024643770024296028, "rougeLsum_recall": 0.15838825460379957, "rougeLsum_recall_stderr": 0.0030224100131612764}, "summarize_DOC": {"bleu": 0.8519606095372889, "bleu_stderr": 0.10081770314780626, "rouge1_fmeasure": 0.1288656936450777, "rouge1_fmeasure_stderr": 0.002285142640984343, "rouge1_precision": 0.09822153128767802, "rouge1_precision_stderr": 0.0021428329923455346, "rouge1_recall": 0.2173704671442931, "rouge1_recall_stderr": 0.003785275093896278, "rouge2_fmeasure": 0.02079595560428378, "rouge2_fmeasure_stderr": 0.0010061139041632509, "rouge2_precision": 0.015471263083193475, "rouge2_precision_stderr": 0.0007775392244828585, "rouge2_recall": 0.03648134817348442, "rouge2_recall_stderr": 0.0018073965169390764, "rougeL_fmeasure": 0.11230658338330175, "rougeL_fmeasure_stderr": 0.001849039452146557, "rougeL_precision": 0.08524370672290423, "rougeL_precision_stderr": 0.0017456965733000762, "rougeL_recall": 0.19064982924857338, "rougeL_recall_stderr": 0.0031548397347180086, "rougeLsum_fmeasure": 0.10101095703467147, "rougeLsum_fmeasure_stderr": 0.001798939254091859, "rougeLsum_precision": 0.07709178118688563, "rougeLsum_precision_stderr": 0.0017340953216017302, "rougeLsum_recall": 0.1714803560163439, "rougeLsum_recall_stderr": 0.0030653620428528205}, "summarize_this_DOC_summary": {"bleu": 1.033576975690131, "bleu_stderr": 0.10612867731517484, "rouge1_fmeasure": 0.14887450442636246, "rouge1_fmeasure_stderr": 0.00274448239030748, "rouge1_precision": 0.13244835829967186, "rouge1_precision_stderr": 0.0030908517791536955, "rouge1_recall": 0.212588522071666, "rouge1_recall_stderr": 0.003862810884401867, "rouge2_fmeasure": 0.02410030299641933, "rouge2_fmeasure_stderr": 0.0013458556293409396, "rouge2_precision": 0.021448284322085266, "rouge2_precision_stderr": 0.001389010296371262, "rouge2_recall": 0.034841891806055784, "rouge2_recall_stderr": 0.001852154768566367, "rougeL_fmeasure": 0.1217506880628755, "rougeL_fmeasure_stderr": 0.002161955887082069, "rougeL_precision": 0.10749771032588204, "rougeL_precision_stderr": 0.0024493472667949895, "rougeL_recall": 0.17604861924046014, "rougeL_recall_stderr": 0.0031652710323202445, "rougeLsum_fmeasure": 0.11764148962890421, "rougeLsum_fmeasure_stderr": 0.0022216493805243694, "rougeLsum_precision": 0.104633566179651, "rougeLsum_precision_stderr": 0.002509395651495409, "rougeLsum_recall": 0.1689633360044492, "rougeLsum_recall_stderr": 0.003201254184241733}}, "4": {"DOC_boils_down_to_simple_idea_that": {"bleu": 0.18440179296896278, "bleu_stderr": 0.034342854775642216, "rouge1_fmeasure": 0.032825069850753535, "rouge1_fmeasure_stderr": 0.002115896509151276, "rouge1_precision": 0.03312872880982003, "rouge1_precision_stderr": 0.0023364937989206456, "rouge1_recall": 0.04331842400385379, "rouge1_recall_stderr": 0.0029785608067230333, "rouge2_fmeasure": 0.0045358862257299385, "rouge2_fmeasure_stderr": 0.000680391156202852, "rouge2_precision": 0.004031880720388147, "rouge2_precision_stderr": 0.0007055581272082209, "rouge2_recall": 0.006940542757637712, "rouge2_recall_stderr": 0.0010097939996129512, "rougeL_fmeasure": 0.02557068204061304, "rougeL_fmeasure_stderr": 0.0016585142146009432, "rougeL_precision": 0.02612465010671101, "rougeL_precision_stderr": 0.0018898939348251488, "rougeL_recall": 0.033505823824258554, "rougeL_recall_stderr": 0.002305244406205452, "rougeLsum_fmeasure": 0.026530666924173267, "rougeLsum_fmeasure_stderr": 0.0017221175504394289, "rougeLsum_precision": 0.02683188172054129, "rougeLsum_precision_stderr": 0.0019196788856535153, "rougeLsum_recall": 0.035158918419347346, "rougeLsum_recall_stderr": 0.0024488884244711472}, "DOC_tldr": {"bleu": 0.2878314198644634, "bleu_stderr": 0.09522067958952156, "rouge1_fmeasure": 0.039198145576751886, "rouge1_fmeasure_stderr": 0.0022897766752655114, "rouge1_precision": 0.04257852396991355, "rouge1_precision_stderr": 0.002914976483248318, "rouge1_recall": 0.05044280044044616, "rouge1_recall_stderr": 0.0030109427993094294, "rouge2_fmeasure": 0.005605133916804629, "rouge2_fmeasure_stderr": 0.000674983978729807, "rouge2_precision": 0.005213726639706752, "rouge2_precision_stderr": 0.0007421737476799913, "rouge2_recall": 0.007806425202197695, "rouge2_recall_stderr": 0.0009097455571182794, "rougeL_fmeasure": 0.03233599361611141, "rougeL_fmeasure_stderr": 0.0018540861576038941, "rougeL_precision": 0.035672949988739856, "rougeL_precision_stderr": 0.002547892797474333, "rougeL_recall": 0.042169944453916135, "rougeL_recall_stderr": 0.002493645678486285, "rougeLsum_fmeasure": 0.03127361667218934, "rougeLsum_fmeasure_stderr": 0.001818299978680775, "rougeLsum_precision": 0.03487509346325613, "rougeLsum_precision_stderr": 0.0025355122945714846, "rougeLsum_recall": 0.04044744995699889, "rougeLsum_recall_stderr": 0.002420429517483526}, "article_DOC_summary": {"bleu": 0.17189105743351635, "bleu_stderr": 0.038682318528850144, "rouge1_fmeasure": 0.039591317930521744, "rouge1_fmeasure_stderr": 0.0022931685649421212, "rouge1_precision": 0.04085291524121532, "rouge1_precision_stderr": 0.002589850198863279, "rouge1_recall": 0.049526509450874855, "rouge1_recall_stderr": 0.00300127990043923, "rouge2_fmeasure": 0.005510795996651232, "rouge2_fmeasure_stderr": 0.0006213462695311744, "rouge2_precision": 0.005319857832468002, "rouge2_precision_stderr": 0.0007156080942971114, "rouge2_recall": 0.007574332469978282, "rouge2_recall_stderr": 0.0009017550685095564, "rougeL_fmeasure": 0.03214999836248129, "rougeL_fmeasure_stderr": 0.0018423539820022828, "rougeL_precision": 0.03358813626286002, "rougeL_precision_stderr": 0.002206945445578637, "rougeL_recall": 0.04060546427390794, "rougeL_recall_stderr": 0.0024697862019406065, "rougeLsum_fmeasure": 0.03146184764090875, "rougeLsum_fmeasure_stderr": 0.0018112412925546117, "rougeLsum_precision": 0.03309676277901219, "rougeLsum_precision_stderr": 0.002199818877733498, "rougeLsum_recall": 0.03950562122200865, "rougeLsum_recall_stderr": 0.0024124824570383946}, "summarize_DOC": {"bleu": 0.311490095229844, "bleu_stderr": 0.08056516332123577, "rouge1_fmeasure": 0.0392783960707505, "rouge1_fmeasure_stderr": 0.0022527501571849374, "rouge1_precision": 0.03689476483753298, "rouge1_precision_stderr": 0.0024463384773361955, "rouge1_recall": 0.05735145180342211, "rouge1_recall_stderr": 0.003370593307800384, "rouge2_fmeasure": 0.006225565560992794, "rouge2_fmeasure_stderr": 0.0006675181095595146, "rouge2_precision": 0.005453105497796419, "rouge2_precision_stderr": 0.0007237955340670446, "rouge2_recall": 0.009826935945674431, "rouge2_recall_stderr": 0.001076383576002216, "rougeL_fmeasure": 0.03280903331157746, "rougeL_fmeasure_stderr": 0.0018180599330376993, "rougeL_precision": 0.030609500618293827, "rougeL_precision_stderr": 0.00201564752141265, "rougeL_recall": 0.04862705218381443, "rougeL_recall_stderr": 0.0027960780471146885, "rougeLsum_fmeasure": 0.030578597001076086, "rougeLsum_fmeasure_stderr": 0.0017593294917667497, "rougeLsum_precision": 0.028977502639200816, "rougeLsum_precision_stderr": 0.001997201119048309, "rougeLsum_recall": 0.04481380017813916, "rougeLsum_recall_stderr": 0.0026573416129651475}, "summarize_this_DOC_summary": {"bleu": 0.17410955528806296, "bleu_stderr": 0.035584087070912734, "rouge1_fmeasure": 0.03807978716467659, "rouge1_fmeasure_stderr": 0.0023526154852758037, "rouge1_precision": 0.03956432039685865, "rouge1_precision_stderr": 0.002695013853025962, "rouge1_recall": 0.0492235193132569, "rouge1_recall_stderr": 0.003192852760395571, "rouge2_fmeasure": 0.006031590914671055, "rouge2_fmeasure_stderr": 0.0006648715199487534, "rouge2_precision": 0.006014101598531974, "rouge2_precision_stderr": 0.0007964027992614052, "rouge2_recall": 0.008256986942409495, "rouge2_recall_stderr": 0.0009340954377351208, "rougeL_fmeasure": 0.030352247654815704, "rougeL_fmeasure_stderr": 0.0018302961576454654, "rougeL_precision": 0.03171603699831025, "rougeL_precision_stderr": 0.0021853394627631482, "rougeL_recall": 0.04009236083483906, "rougeL_recall_stderr": 0.0026034873427392777, "rougeLsum_fmeasure": 0.029887550353590413, "rougeLsum_fmeasure_stderr": 0.0018411395918680518, "rougeLsum_precision": 0.031509274099563875, "rougeLsum_precision_stderr": 0.002203992098896614, "rougeLsum_recall": 0.03906347934555628, "rougeLsum_recall_stderr": 0.0025972716725576332}}, "5": {"DOC_boils_down_to_simple_idea_that": {"bleu": 0.0, "bleu_stderr": 0.0, "rouge1_fmeasure": 0.0003772463816858933, "rouge1_fmeasure_stderr": 0.00018869438087139786, "rouge1_precision": 0.003430531732418525, "rouge1_precision_stderr": 0.0017130559457731933, "rouge1_recall": 0.0001996386258755556, "rouge1_recall_stderr": 9.987616180882829e-05, "rouge2_fmeasure": 0.0, "rouge2_fmeasure_stderr": 0.0, "rouge2_precision": 0.0, "rouge2_precision_stderr": 0.0, "rouge2_recall": 0.0, "rouge2_recall_stderr": 0.0, "rougeL_fmeasure": 0.0003772463816858933, "rougeL_fmeasure_stderr": 0.00018869438087139786, "rougeL_precision": 0.003430531732418525, "rougeL_precision_stderr": 0.0017130559457731933, "rougeL_recall": 0.0001996386258755556, "rougeL_recall_stderr": 9.987616180882829e-05, "rougeLsum_fmeasure": 0.0003772463816858933, "rougeLsum_fmeasure_stderr": 0.00018869438087139786, "rougeLsum_precision": 0.003430531732418525, "rougeLsum_precision_stderr": 0.0017130559457731933, "rougeLsum_recall": 0.0001996386258755556, "rougeLsum_recall_stderr": 9.987616180882829e-05}, "DOC_tldr": {"bleu": 3.539919827428958e-43, "bleu_stderr": 2.1011888360679252e-37, "rouge1_fmeasure": 0.0023996080385096956, "rouge1_fmeasure_stderr": 0.0006585056185108358, "rouge1_precision": 0.002936256429086033, "rouge1_precision_stderr": 0.0008325661288169507, "rouge1_recall": 0.0021595962684425826, "rouge1_recall_stderr": 0.0005981401663289608, "rouge2_fmeasure": 0.0003713888095226837, "rouge2_fmeasure_stderr": 0.00017882541746315305, "rouge2_precision": 0.00041274393383161417, "rouge2_precision_stderr": 0.0001973715747410765, "rouge2_recall": 0.0003399001109877913, "rouge2_recall_stderr": 0.000164471497611397, "rougeL_fmeasure": 0.0019987192664100155, "rougeL_fmeasure_stderr": 0.0005427413769701772, "rougeL_precision": 0.0024675512225894843, "rougeL_precision_stderr": 0.000714409976176186, "rougeL_recall": 0.0017999664297352442, "rougeL_recall_stderr": 0.0004895093404742078, "rougeLsum_fmeasure": 0.0021468678403871737, "rougeLsum_fmeasure_stderr": 0.0005854611584876265, "rougeLsum_precision": 0.002632861772195261, "rougeLsum_precision_stderr": 0.000753763054701241, "rougeLsum_recall": 0.001937003366161748, "rougeLsum_recall_stderr": 0.000531587586598376}, "article_DOC_summary": {"bleu": 3.736787794674515e-39, "bleu_stderr": 8.045576123348552e-33, "rouge1_fmeasure": 0.0031357253217098413, "rouge1_fmeasure_stderr": 0.0008645982383809647, "rouge1_precision": 0.0035305925957395084, "rouge1_precision_stderr": 0.0009785490984386396, "rouge1_recall": 0.0030440980529733517, "rouge1_recall_stderr": 0.0008638602364047491, "rouge2_fmeasure": 0.0007075672984960149, "rouge2_fmeasure_stderr": 0.0003298891212277737, "rouge2_precision": 0.0007350839134967882, "rouge2_precision_stderr": 0.00032922794052150407, "rouge2_recall": 0.0006963162623539982, "rouge2_recall_stderr": 0.0003353629065457281, "rougeL_fmeasure": 0.0025693656488664164, "rougeL_fmeasure_stderr": 0.0007105243349554819, "rougeL_precision": 0.0028741250344439784, "rougeL_precision_stderr": 0.0007961665239105527, "rougeL_recall": 0.0025236529245087863, "rougeL_recall_stderr": 0.0007285140262655341, "rougeLsum_fmeasure": 0.0027055779382418573, "rougeLsum_fmeasure_stderr": 0.0007426343024247237, "rougeLsum_precision": 0.0030230196408857547, "rougeLsum_precision_stderr": 0.0008304280978614765, "rougeLsum_recall": 0.0026492658288523934, "rougeLsum_recall_stderr": 0.0007552587612647047}, "summarize_DOC": {"bleu": 8.81729878858539e-39, "bleu_stderr": 2.936570585229035e-33, "rouge1_fmeasure": 0.002421692802983106, "rouge1_fmeasure_stderr": 0.0006764738834041464, "rouge1_precision": 0.0027508790221273424, "rouge1_precision_stderr": 0.0007706396307316284, "rouge1_recall": 0.0022719827482217438, "rouge1_recall_stderr": 0.000646293142327662, "rouge2_fmeasure": 0.0004607621449381724, "rouge2_fmeasure_stderr": 0.00020508003752744115, "rouge2_precision": 0.0004918476824692141, "rouge2_precision_stderr": 0.00020999706938142754, "rouge2_recall": 0.00044210136884942656, "rouge2_recall_stderr": 0.00020463083076435227, "rougeL_fmeasure": 0.0021069927716341674, "rougeL_fmeasure_stderr": 0.0005727525693181224, "rougeL_precision": 0.0024136119947061004, "rougeL_precision_stderr": 0.0006725506737183709, "rougeL_recall": 0.0019698513077900763, "rougeL_recall_stderr": 0.0005402889527601966, "rougeLsum_fmeasure": 0.002169328410821175, "rougeLsum_fmeasure_stderr": 0.0006128588314949923, "rougeLsum_precision": 0.0024665686894058876, "rougeLsum_precision_stderr": 0.0007035053623232305, "rougeLsum_recall": 0.0020393354574629054, "rougeLsum_recall_stderr": 0.0005858747974607231}, "summarize_this_DOC_summary": {"bleu": 0.0, "bleu_stderr": 0.0, "rouge1_fmeasure": 0.0008897900877168533, "rouge1_fmeasure_stderr": 0.00034150756554492286, "rouge1_precision": 0.003716409376786735, "rouge1_precision_stderr": 0.001337034178653581, "rouge1_recall": 0.0005091066357522025, "rouge1_recall_stderr": 0.0001976825729298653, "rouge2_fmeasure": 0.0001905850962454736, "rouge2_fmeasure_stderr": 0.00019058509624547674, "rouge2_precision": 0.0008576329331046312, "rouge2_precision_stderr": 0.0008576329331046335, "rouge2_recall": 0.0001072041166380789, "rouge2_recall_stderr": 0.00010720411663807919, "rougeL_fmeasure": 0.0008897900877168533, "rougeL_fmeasure_stderr": 0.00034150756554492286, "rougeL_precision": 0.003716409376786735, "rougeL_precision_stderr": 0.001337034178653581, "rougeL_recall": 0.0005091066357522025, "rougeL_recall_stderr": 0.0001976825729298653, "rougeLsum_fmeasure": 0.0008897900877168533, "rougeLsum_fmeasure_stderr": 0.00034150756554492286, "rougeLsum_precision": 0.003716409376786735, "rougeLsum_precision_stderr": 0.001337034178653581, "rougeLsum_recall": 0.0005091066357522025, "rougeLsum_recall_stderr": 0.0001976825729298653}}}, "piqa": {"0": {"Correct the solution": {"bleu": 3.8125985335008306, "bleu_stderr": 0.19585723695627313, "rouge1_fmeasure": 0.10026196967185438, "rouge1_fmeasure_stderr": 0.002577507472274814, "rouge1_precision": 0.06195211374786627, "rouge1_precision_stderr": 0.002094672822781696, "rouge1_recall": 0.5162593447897136, "rouge1_recall_stderr": 0.007535474730992951, "rouge2_fmeasure": 0.06739212009258444, "rouge2_fmeasure_stderr": 0.0024788238669206223, "rouge2_precision": 0.04159189415204392, "rouge2_precision_stderr": 0.0018730228506480733, "rouge2_recall": 0.34313550824678557, "rouge2_recall_stderr": 0.008028791094629359, "rougeL_fmeasure": 0.09782216479158326, "rougeL_fmeasure_stderr": 0.002562059753885468, "rougeL_precision": 0.06050388889344785, "rougeL_precision_stderr": 0.0020837386360429886, "rougeL_recall": 0.5050572660910998, "rougeL_recall_stderr": 0.0075928927820709475, "rougeLsum_fmeasure": 0.09420308480263866, "rougeLsum_fmeasure_stderr": 0.0025478112691233964, "rougeLsum_precision": 0.058339101160589345, "rougeLsum_precision_stderr": 0.0020763508063761675, "rougeLsum_recall": 0.49022104146755285, "rougeLsum_recall_stderr": 0.007674824806892895}, "choose the most appropriate solution": {"acc": 0.49891186071817195, "acc_norm": 0.49891186071817195, "acc_norm_stderr": 0.011665796539540878, "acc_stderr": 0.011665796539540878}, "no prompt needed": {"bleu": 0.08868063793537338, "bleu_stderr": 0.010679205176766326, "rouge1_fmeasure": 0.03326642877020731, "rouge1_fmeasure_stderr": 0.0009655071900921132, "rouge1_precision": 0.024079092011199548, "rouge1_precision_stderr": 0.0012751225464276315, "rouge1_recall": 0.16382504234512227, "rouge1_recall_stderr": 0.0033146808351779324, "rouge2_fmeasure": 0.0038833183768095616, "rouge2_fmeasure_stderr": 0.0002654112249994602, "rouge2_precision": 0.0026147181115056114, "rouge2_precision_stderr": 0.0002787850657177482, "rouge2_recall": 0.01942570519259786, "rouge2_recall_stderr": 0.0011345558550410505, "rougeL_fmeasure": 0.030279861281445327, "rougeL_fmeasure_stderr": 0.0008455385563357755, "rougeL_precision": 0.02173563771564973, "rougeL_precision_stderr": 0.0011135246337014074, "rougeL_recall": 0.1525533994924883, "rougeL_recall_stderr": 0.003123796083077344, "rougeLsum_fmeasure": 0.02719812824801991, "rougeLsum_fmeasure_stderr": 0.0007883831967628123, "rougeLsum_precision": 0.019933519868235043, "rougeLsum_precision_stderr": 0.0011006754681286612, "rougeLsum_recall": 0.1373792671164724, "rougeLsum_recall_stderr": 0.002829849172122982}, "pick_correct_choice_index": {"acc": 0.49510337323177367, "acc_norm": 0.49510337323177367, "acc_norm_stderr": 0.01166526473007815, "acc_stderr": 0.01166526473007815}, "what_is_the_correct_ending": {"acc": 0.5136017410228509, "acc_norm": 0.5179542981501633, "acc_norm_stderr": 0.011658300623287153, "acc_stderr": 0.01166150683982377}}, "1": {"Correct the solution": {"bleu": 4.984115818676601, "bleu_stderr": 0.2014952793991324, "rouge1_fmeasure": 0.18342088795857475, "rouge1_fmeasure_stderr": 0.005818959285067925, "rouge1_precision": 0.19442793157823965, "rouge1_precision_stderr": 0.00631830683765845, "rouge1_recall": 0.370171505230012, "rouge1_recall_stderr": 0.008099242120212022, "rouge2_fmeasure": 0.11307708650003363, "rouge2_fmeasure_stderr": 0.005231782535257262, "rouge2_precision": 0.11018136222612517, "rouge2_precision_stderr": 0.005443725071980991, "rouge2_recall": 0.2408430767207643, "rouge2_recall_stderr": 0.007813135912300767, "rougeL_fmeasure": 0.17550095028177032, "rougeL_fmeasure_stderr": 0.00577802693447086, "rougeL_precision": 0.1830299827679806, "rougeL_precision_stderr": 0.006160676873355797, "rougeL_recall": 0.36225372579005893, "rougeL_recall_stderr": 0.008159488239081773, "rougeLsum_fmeasure": 0.17499439612022724, "rougeLsum_fmeasure_stderr": 0.005797120916813952, "rougeLsum_precision": 0.18472309686765498, "rougeLsum_precision_stderr": 0.006223758103180095, "rougeLsum_recall": 0.3537491479629035, "rougeLsum_recall_stderr": 0.00808717995340113}, "choose the most appropriate solution": {"acc": 0.5021762785636561, "acc_norm": 0.5021762785636561, "acc_norm_stderr": 0.011665713661738877, "acc_stderr": 0.011665713661738877}, "no prompt needed": {"bleu": 0.13588134778013236, "bleu_stderr": 0.015459519670994496, "rouge1_fmeasure": 0.04901346680697841, "rouge1_fmeasure_stderr": 0.002055865923271878, "rouge1_precision": 0.05300894951369402, "rouge1_precision_stderr": 0.0027301529015735723, "rouge1_recall": 0.1061507145383213, "rouge1_recall_stderr": 0.0032149427593071117, "rouge2_fmeasure": 0.007982487413659915, "rouge2_fmeasure_stderr": 0.000712887056916324, "rouge2_precision": 0.008494513980757509, "rouge2_precision_stderr": 0.0008377205870598726, "rouge2_recall": 0.015925496135927367, "rouge2_recall_stderr": 0.0011332549301939603, "rougeL_fmeasure": 0.04302281186011969, "rougeL_fmeasure_stderr": 0.0018156151734925613, "rougeL_precision": 0.0457742343119478, "rougeL_precision_stderr": 0.002346181762140086, "rougeL_recall": 0.09699249115638213, "rougeL_recall_stderr": 0.003006081235657284, "rougeLsum_fmeasure": 0.042082706048624005, "rougeLsum_fmeasure_stderr": 0.0018277105669673647, "rougeLsum_precision": 0.04604314004112248, "rougeLsum_precision_stderr": 0.002446501769385043, "rougeLsum_recall": 0.090627933605973, "rougeLsum_recall_stderr": 0.002845023971018631}, "pick_correct_choice_index": {"acc": 0.5048966267682263, "acc_norm": 0.5048966267682263, "acc_norm_stderr": 0.01166526473007814, "acc_stderr": 0.01166526473007814}, "what_is_the_correct_ending": {"acc": 0.5206746463547334, "acc_norm": 0.5255712731229597, "acc_norm_stderr": 0.011650557844573577, "acc_stderr": 0.011655846995729705}}, "2": {"Correct the solution": {"bleu": 8.796771634022612, "bleu_stderr": 0.3911316659754168, "rouge1_fmeasure": 0.3228173852707755, "rouge1_fmeasure_stderr": 0.007403843294444907, "rouge1_precision": 0.3383306969374818, "rouge1_precision_stderr": 0.00792879108703781, "rouge1_recall": 0.4846166240136867, "rouge1_recall_stderr": 0.00804262853108372, "rouge2_fmeasure": 0.22646917848202447, "rouge2_fmeasure_stderr": 0.00698616733256225, "rouge2_precision": 0.2300011447301482, "rouge2_precision_stderr": 0.007377109399709124, "rouge2_recall": 0.3444986994568273, "rouge2_recall_stderr": 0.00824995631557637, "rougeL_fmeasure": 0.31247494161086015, "rougeL_fmeasure_stderr": 0.0074228242548635155, "rougeL_precision": 0.3246607866009956, "rougeL_precision_stderr": 0.007877219184249775, "rougeL_recall": 0.47418408456836464, "rougeL_recall_stderr": 0.008158139222284562, "rougeLsum_fmeasure": 0.312822879065026, "rougeLsum_fmeasure_stderr": 0.007434259241066784, "rougeLsum_precision": 0.32707982563239135, "rougeLsum_precision_stderr": 0.007911882392548962, "rougeLsum_recall": 0.46874851911407, "rougeLsum_recall_stderr": 0.008123276443221847}, "choose the most appropriate solution": {"acc": 0.5119695321001088, "acc_norm": 0.5119695321001088, "acc_norm_stderr": 0.011662480968070068, "acc_stderr": 0.011662480968070068}, "no prompt needed": {"bleu": 0.2131390370421847, "bleu_stderr": 0.034066817734553397, "rouge1_fmeasure": 0.03799621801376054, "rouge1_fmeasure_stderr": 0.0019833229325767424, "rouge1_precision": 0.04766490997973889, "rouge1_precision_stderr": 0.0027505872541556155, "rouge1_recall": 0.056720871691876285, "rouge1_recall_stderr": 0.002671280465988878, "rouge2_fmeasure": 0.0063412101839208194, "rouge2_fmeasure_stderr": 0.0006568616041856655, "rouge2_precision": 0.007753824889172869, "rouge2_precision_stderr": 0.0009180726582653088, "rouge2_recall": 0.009676078489313466, "rouge2_recall_stderr": 0.0009719598996118394, "rougeL_fmeasure": 0.03271614089031179, "rougeL_fmeasure_stderr": 0.0017135896610323722, "rougeL_precision": 0.04049213061658682, "rougeL_precision_stderr": 0.0023256155896140274, "rougeL_recall": 0.05059606948650782, "rougeL_recall_stderr": 0.0024394702454846825, "rougeLsum_fmeasure": 0.03282280188970894, "rougeLsum_fmeasure_stderr": 0.0017310021914839983, "rougeLsum_precision": 0.041656568079500036, "rougeLsum_precision_stderr": 0.0024499403689278464, "rougeLsum_recall": 0.04892348784841611, "rougeLsum_recall_stderr": 0.002356085346485099}, "pick_correct_choice_index": {"acc": 0.5027203482045702, "acc_norm": 0.5027203482045702, "acc_norm_stderr": 0.011665651503000718, "acc_stderr": 0.011665651503000718}, "what_is_the_correct_ending": {"acc": 0.5201305767138193, "acc_norm": 0.5206746463547334, "acc_norm_stderr": 0.011655846995729705, "acc_stderr": 0.011656365410780373}}, "3": {"Correct the solution": {"bleu": 10.92470649416081, "bleu_stderr": 0.34911211757447225, "rouge1_fmeasure": 0.38818722809693845, "rouge1_fmeasure_stderr": 0.00771433388675621, "rouge1_precision": 0.4066260252913565, "rouge1_precision_stderr": 0.008231212467132855, "rouge1_recall": 0.5342107425741864, "rouge1_recall_stderr": 0.007869279508511906, "rouge2_fmeasure": 0.2818434589400104, "rouge2_fmeasure_stderr": 0.007413903409954248, "rouge2_precision": 0.2880242729653989, "rouge2_precision_stderr": 0.007821913844444021, "rouge2_recall": 0.3925434030174727, "rouge2_recall_stderr": 0.008258713922522358, "rougeL_fmeasure": 0.37744167318989674, "rougeL_fmeasure_stderr": 0.007752920486516649, "rougeL_precision": 0.39159852574062887, "rougeL_precision_stderr": 0.008194163001101947, "rougeL_recall": 0.5241346096429227, "rougeL_recall_stderr": 0.007997099796142482, "rougeLsum_fmeasure": 0.3780280451942055, "rougeLsum_fmeasure_stderr": 0.00775661536455512, "rougeLsum_precision": 0.39413857987963213, "rougeLsum_precision_stderr": 0.008220017403685996, "rougeLsum_recall": 0.5206460681141725, "rougeLsum_recall_stderr": 0.007963384607750488}, "choose the most appropriate solution": {"acc": 0.5032644178454843, "acc_norm": 0.5032644178454843, "acc_norm_stderr": 0.01166557553076037, "acc_stderr": 0.01166557553076037}, "no prompt needed": {"bleu": 0.22997720223598342, "bleu_stderr": 0.02338425872757522, "rouge1_fmeasure": 0.03710567348980287, "rouge1_fmeasure_stderr": 0.0020180005367721313, "rouge1_precision": 0.05031505431843015, "rouge1_precision_stderr": 0.0029584770445463347, "rouge1_recall": 0.04384933797682676, "rouge1_recall_stderr": 0.002438097616145567, "rouge2_fmeasure": 0.006082833337651904, "rouge2_fmeasure_stderr": 0.0006978818850891022, "rouge2_precision": 0.008144343048337636, "rouge2_precision_stderr": 0.0009881296494107798, "rouge2_recall": 0.0074638698626018, "rouge2_recall_stderr": 0.0009315826019915539, "rougeL_fmeasure": 0.032270439068929166, "rougeL_fmeasure_stderr": 0.0017622474773529432, "rougeL_precision": 0.043579152184238995, "rougeL_precision_stderr": 0.0025694327719803156, "rougeL_recall": 0.03882153127320646, "rougeL_recall_stderr": 0.002203760453572274, "rougeLsum_fmeasure": 0.0327651263489432, "rougeLsum_fmeasure_stderr": 0.001788852779448503, "rougeLsum_precision": 0.04474778358521563, "rougeLsum_precision_stderr": 0.002654591770374059, "rougeLsum_recall": 0.03883827376493022, "rougeLsum_recall_stderr": 0.0022045086521405998}, "pick_correct_choice_index": {"acc": 0.5108813928182807, "acc_norm": 0.5108813928182807, "acc_norm_stderr": 0.011663061261117758, "acc_stderr": 0.011663061261117758}, "what_is_the_correct_ending": {"acc": 0.5223068552774756, "acc_norm": 0.5266594124047879, "acc_norm_stderr": 0.01164922999434739, "acc_stderr": 0.011654208652596471}}, "4": {"Correct the solution": {"bleu": 12.858154272179098, "bleu_stderr": 0.7278916577243625, "rouge1_fmeasure": 0.4351861616924822, "rouge1_fmeasure_stderr": 0.007752659407235736, "rouge1_precision": 0.4624335317026793, "rouge1_precision_stderr": 0.00826557767550841, "rouge1_recall": 0.563714294146611, "rouge1_recall_stderr": 0.007612246963891272, "rouge2_fmeasure": 0.32028893653738877, "rouge2_fmeasure_stderr": 0.007613634129323394, "rouge2_precision": 0.33238154342689963, "rouge2_precision_stderr": 0.008074274590958037, "rouge2_recall": 0.4191511130266625, "rouge2_recall_stderr": 0.008174601438156475, "rougeL_fmeasure": 0.423598898334552, "rougeL_fmeasure_stderr": 0.00783537952283608, "rougeL_precision": 0.44619870598373307, "rougeL_precision_stderr": 0.008266117528340293, "rougeL_recall": 0.5526110549048481, "rougeL_recall_stderr": 0.0077829782817278575, "rougeLsum_fmeasure": 0.4248676289113619, "rougeLsum_fmeasure_stderr": 0.007827628127897685, "rougeLsum_precision": 0.4499232867985444, "rougeLsum_precision_stderr": 0.008282275077255348, "rougeLsum_recall": 0.5499968165738798, "rougeLsum_recall_stderr": 0.007744452222402591}, "choose the most appropriate solution": {"acc": 0.5038084874863983, "acc_norm": 0.5038084874863983, "acc_norm_stderr": 0.0116654857447468, "acc_stderr": 0.0116654857447468}, "no prompt needed": {"bleu": 0.2712297654901753, "bleu_stderr": 0.053042750349294135, "rouge1_fmeasure": 0.04059849587408926, "rouge1_fmeasure_stderr": 0.002182568730131035, "rouge1_precision": 0.0542226515580773, "rouge1_precision_stderr": 0.0030939110930176925, "rouge1_recall": 0.046451743768737, "rouge1_recall_stderr": 0.0025439882698671474, "rouge2_fmeasure": 0.0070696546789010785, "rouge2_fmeasure_stderr": 0.0007767095517004894, "rouge2_precision": 0.008974181628774633, "rouge2_precision_stderr": 0.0009729364560951758, "rouge2_recall": 0.00866884991169637, "rouge2_recall_stderr": 0.0010298253089854248, "rougeL_fmeasure": 0.03526961790871762, "rougeL_fmeasure_stderr": 0.0019038989493982874, "rougeL_precision": 0.04715725401216418, "rougeL_precision_stderr": 0.0027156375130040103, "rougeL_recall": 0.040955294760240304, "rougeL_recall_stderr": 0.002277246735797152, "rougeLsum_fmeasure": 0.03562639655601933, "rougeLsum_fmeasure_stderr": 0.0019216190005916407, "rougeLsum_precision": 0.048027622411834725, "rougeLsum_precision_stderr": 0.002785556905287026, "rougeLsum_recall": 0.040990146321799474, "rougeLsum_recall_stderr": 0.002275559958385726}, "pick_correct_choice_index": {"acc": 0.5087051142546246, "acc_norm": 0.5087051142546246, "acc_norm_stderr": 0.011664055982032842, "acc_stderr": 0.011664055982032842}, "what_is_the_correct_ending": {"acc": 0.529923830250272, "acc_norm": 0.529379760609358, "acc_norm_stderr": 0.011645667565050864, "acc_stderr": 0.011644913435420155}}, "5": {"Correct the solution": {"bleu": 14.060734984477335, "bleu_stderr": 0.3178125511335596, "rouge1_fmeasure": 0.46127098559948143, "rouge1_fmeasure_stderr": 0.007762781204476088, "rouge1_precision": 0.48801414741034715, "rouge1_precision_stderr": 0.008239688228502035, "rouge1_recall": 0.5852280389043574, "rouge1_recall_stderr": 0.007442570769433745, "rouge2_fmeasure": 0.3436917448426498, "rouge2_fmeasure_stderr": 0.007674129864826368, "rouge2_precision": 0.35639429216000035, "rouge2_precision_stderr": 0.0080903487818709, "rouge2_recall": 0.4396664484838038, "rouge2_recall_stderr": 0.008127010865714702, "rougeL_fmeasure": 0.45010094870717293, "rougeL_fmeasure_stderr": 0.007834754180971275, "rougeL_precision": 0.4725439297415101, "rougeL_precision_stderr": 0.008240735797918716, "rougeL_recall": 0.5746878552043578, "rougeL_recall_stderr": 0.007604121138500754, "rougeLsum_fmeasure": 0.4513631978392974, "rougeLsum_fmeasure_stderr": 0.007835514039656419, "rougeLsum_precision": 0.47616423957106324, "rougeLsum_precision_stderr": 0.008273908357739463, "rougeLsum_recall": 0.5724908344656849, "rougeLsum_recall_stderr": 0.007578130144250922}, "choose the most appropriate solution": {"acc": 0.5016322089227421, "acc_norm": 0.5016322089227421, "acc_norm_stderr": 0.011665762007194868, "acc_stderr": 0.011665762007194868}, "no prompt needed": {"bleu": 0.1498156695877611, "bleu_stderr": 0.028223046286397702, "rouge1_fmeasure": 0.042735679103126284, "rouge1_fmeasure_stderr": 0.002181587233057925, "rouge1_precision": 0.05946021488215291, "rouge1_precision_stderr": 0.00327624600239411, "rouge1_recall": 0.04615385829211834, "rouge1_recall_stderr": 0.0024475462608166006, "rouge2_fmeasure": 0.006029826803091018, "rouge2_fmeasure_stderr": 0.0007150551789811505, "rouge2_precision": 0.0075902239127463664, "rouge2_precision_stderr": 0.0008916116262057205, "rouge2_recall": 0.007204549919294013, "rouge2_recall_stderr": 0.000911423031900326, "rougeL_fmeasure": 0.03698020550413456, "rougeL_fmeasure_stderr": 0.0018951966460887433, "rougeL_precision": 0.05132083543992989, "rougeL_precision_stderr": 0.0028482950530335353, "rougeL_recall": 0.04072766325284397, "rougeL_recall_stderr": 0.002204434423881122, "rougeLsum_fmeasure": 0.03752446063060584, "rougeLsum_fmeasure_stderr": 0.0019205547729290852, "rougeLsum_precision": 0.05260647328921363, "rougeLsum_precision_stderr": 0.00295071182859548, "rougeLsum_recall": 0.04093574793020607, "rougeLsum_recall_stderr": 0.0022066554152275607}, "pick_correct_choice_index": {"acc": 0.5032644178454843, "acc_norm": 0.5032644178454843, "acc_norm_stderr": 0.011665575530760367, "acc_stderr": 0.011665575530760367}, "what_is_the_correct_ending": {"acc": 0.5272034820457019, "acc_norm": 0.5250272034820457, "acc_norm_stderr": 0.011651200967623707, "acc_stderr": 0.011648545262429024}}}, "sciq": {"0": {"Direct Question": {"acc": 0.703, "acc_norm": 0.6, "acc_norm_stderr": 0.015499685165842596, "acc_stderr": 0.014456832294801103}, "Direct Question (Closed Book)": {"acc": 0.397, "acc_norm": 0.377, "acc_norm_stderr": 0.01533317012577986, "acc_stderr": 0.015480007449307996}, "Multiple Choice": {"acc": 0.486, "acc_norm": 0.414, "acc_norm_stderr": 0.015583544104177519, "acc_stderr": 0.015813097547730987}, "Multiple Choice (Closed Book)": {"acc": 0.373, "acc_norm": 0.34, "acc_norm_stderr": 0.014987482264363937, "acc_stderr": 0.015300493622922814}, "Multiple Choice Question First": {"acc": 0.508, "acc_norm": 0.408, "acc_norm_stderr": 0.015549205052920673, "acc_stderr": 0.015817274929209004}}, "1": {"Direct Question": {"acc": 0.723, "acc_norm": 0.666, "acc_norm_stderr": 0.014922019523732965, "acc_stderr": 0.014158794845306265}, "Direct Question (Closed Book)": {"acc": 0.415, "acc_norm": 0.398, "acc_norm_stderr": 0.015486634102858922, "acc_stderr": 0.015589035185604628}, "Multiple Choice": {"acc": 0.427, "acc_norm": 0.436, "acc_norm_stderr": 0.015689173023144067, "acc_stderr": 0.015649789644462224}, "Multiple Choice (Closed Book)": {"acc": 0.376, "acc_norm": 0.391, "acc_norm_stderr": 0.015438826294681783, "acc_stderr": 0.015325105508898129}, "Multiple Choice Question First": {"acc": 0.411, "acc_norm": 0.423, "acc_norm_stderr": 0.01563058909047635, "acc_stderr": 0.015566673418599278}}, "2": {"Direct Question": {"acc": 0.721, "acc_norm": 0.686, "acc_norm_stderr": 0.014683991951087974, "acc_stderr": 0.014190150117612037}, "Direct Question (Closed Book)": {"acc": 0.405, "acc_norm": 0.397, "acc_norm_stderr": 0.015480007449307994, "acc_stderr": 0.015531136990453049}, "Multiple Choice": {"acc": 0.401, "acc_norm": 0.389, "acc_norm_stderr": 0.015424555647308493, "acc_stderr": 0.015506109745498325}, "Multiple Choice (Closed Book)": {"acc": 0.36, "acc_norm": 0.351, "acc_norm_stderr": 0.015100563798316403, "acc_stderr": 0.015186527932040117}, "Multiple Choice Question First": {"acc": 0.366, "acc_norm": 0.379, "acc_norm_stderr": 0.01534909100222535, "acc_stderr": 0.015240612726405754}}, "3": {"Direct Question": {"acc": 0.719, "acc_norm": 0.692, "acc_norm_stderr": 0.014606483127342758, "acc_stderr": 0.014221154708434944}, "Direct Question (Closed Book)": {"acc": 0.396, "acc_norm": 0.392, "acc_norm_stderr": 0.015445859463771297, "acc_stderr": 0.015473313265859406}, "Multiple Choice": {"acc": 0.385, "acc_norm": 0.391, "acc_norm_stderr": 0.015438826294681783, "acc_stderr": 0.015395194445410808}, "Multiple Choice (Closed Book)": {"acc": 0.325, "acc_norm": 0.345, "acc_norm_stderr": 0.015039986742055237, "acc_stderr": 0.014818724459095524}, "Multiple Choice Question First": {"acc": 0.368, "acc_norm": 0.376, "acc_norm_stderr": 0.01532510550889813, "acc_stderr": 0.0152580735615218}}, "4": {"Direct Question": {"acc": 0.723, "acc_norm": 0.701, "acc_norm_stderr": 0.014484778521220465, "acc_stderr": 0.014158794845306265}, "Direct Question (Closed Book)": {"acc": 0.383, "acc_norm": 0.392, "acc_norm_stderr": 0.015445859463771295, "acc_stderr": 0.015380102325652721}, "Multiple Choice": {"acc": 0.376, "acc_norm": 0.378, "acc_norm_stderr": 0.015341165254026644, "acc_stderr": 0.015325105508898125}, "Multiple Choice (Closed Book)": {"acc": 0.341, "acc_norm": 0.343, "acc_norm_stderr": 0.015019206922356951, "acc_stderr": 0.014998131348402707}, "Multiple Choice Question First": {"acc": 0.37, "acc_norm": 0.371, "acc_norm_stderr": 0.015283736211823187, "acc_stderr": 0.015275252316519362}}, "5": {"Direct Question": {"acc": 0.718, "acc_norm": 0.703, "acc_norm_stderr": 0.014456832294801103, "acc_stderr": 0.014236526215291354}, "Direct Question (Closed Book)": {"acc": 0.388, "acc_norm": 0.379, "acc_norm_stderr": 0.01534909100222535, "acc_stderr": 0.01541731797991108}, "Multiple Choice": {"acc": 0.355, "acc_norm": 0.374, "acc_norm_stderr": 0.01530876736900637, "acc_stderr": 0.015139491543780532}, "Multiple Choice (Closed Book)": {"acc": 0.332, "acc_norm": 0.328, "acc_norm_stderr": 0.014853842487270334, "acc_stderr": 0.014899597242811473}, "Multiple Choice Question First": {"acc": 0.361, "acc_norm": 0.382, "acc_norm_stderr": 0.015372453034968522, "acc_stderr": 0.015195720118175115}}}, "story_cloze_2016": {"0": {"Answer Given options": {"acc": 0.5018706574024586, "acc_norm": 0.5093532870122929, "acc_norm_stderr": 0.011560409019420367, "acc_stderr": 0.011562351329083268}, "Choose Story Ending": {"acc": 0.5093532870122929, "acc_norm": 0.5200427578834848, "acc_norm_stderr": 0.01155313897796101, "acc_stderr": 0.011560409019420364}, "Novel Correct Ending": {"acc": 0.504008551576697, "acc_norm": 0.5114911811865313, "acc_norm_stderr": 0.011559378273599123, "acc_stderr": 0.011562060664045736}, "Story Continuation and Options": {"acc": 0.5109567076429716, "acc_norm": 0.5232495991448424, "acc_norm_stderr": 0.01154992548392746, "acc_stderr": 0.011559655791130734}}, "1": {"Answer Given options": {"acc": 0.4922501336183859, "acc_norm": 0.5109567076429716, "acc_norm_stderr": 0.011559655791130729, "acc_stderr": 0.011561043278863545}, "Choose Story Ending": {"acc": 0.49706039551042225, "acc_norm": 0.5093532870122929, "acc_norm_stderr": 0.011560409019420369, "acc_stderr": 0.011562232421541946}, "Novel Correct Ending": {"acc": 0.4906467129877071, "acc_norm": 0.4991982896846606, "acc_norm_stderr": 0.011562417388300193, "acc_stderr": 0.011560409019420369}, "Story Continuation and Options": {"acc": 0.4917156600748263, "acc_norm": 0.5098877605558525, "acc_norm_stderr": 0.011560171163157391, "acc_stderr": 0.01156084507652571}}, "2": {"Answer Given options": {"acc": 0.4794227685729556, "acc_norm": 0.49545697487974344, "acc_norm_stderr": 0.011561954965856519, "acc_stderr": 0.011552636515221858}, "Choose Story Ending": {"acc": 0.4863709246392304, "acc_norm": 0.4975948690539818, "acc_norm_stderr": 0.011562298481438055, "acc_stderr": 0.011558135970599896}, "Novel Correct Ending": {"acc": 0.4719401389631213, "acc_norm": 0.4949225013361839, "acc_norm_stderr": 0.01156183605423878, "acc_stderr": 0.011544210396951663}, "Story Continuation and Options": {"acc": 0.48743987172634956, "acc_norm": 0.4917156600748263, "acc_norm_stderr": 0.01156084507652571, "acc_stderr": 0.01155878357073797}}, "3": {"Answer Given options": {"acc": 0.4836985569214324, "acc_norm": 0.5056119722073757, "acc_norm_stderr": 0.011561703928784337, "acc_stderr": 0.011556285484521561}, "Choose Story Ending": {"acc": 0.4922501336183859, "acc_norm": 0.4997327632282202, "acc_norm_stderr": 0.011562430600098489, "acc_stderr": 0.011561043278863545}, "Novel Correct Ending": {"acc": 0.4820951362907536, "acc_norm": 0.49599144842330306, "acc_norm_stderr": 0.011562060664045727, "acc_stderr": 0.011555016408505476}, "Story Continuation and Options": {"acc": 0.481560662747194, "acc_norm": 0.49812934259754144, "acc_norm_stderr": 0.011562351329083266, "acc_stderr": 0.011554566910658105}}, "4": {"Answer Given options": {"acc": 0.48476750400855156, "acc_norm": 0.49812934259754144, "acc_norm_stderr": 0.011562351329083266, "acc_stderr": 0.011557065368348291}, "Choose Story Ending": {"acc": 0.4778193479422769, "acc_norm": 0.49706039551042225, "acc_norm_stderr": 0.011562232421541946, "acc_stderr": 0.011551049647290307}, "Novel Correct Ending": {"acc": 0.4820951362907536, "acc_norm": 0.48690539818278994, "acc_norm_stderr": 0.011558466383367178, "acc_stderr": 0.011555016408505476}, "Story Continuation and Options": {"acc": 0.47888829502939606, "acc_norm": 0.4911811865312667, "acc_norm_stderr": 0.011560633656952963, "acc_stderr": 0.011552120807053817}}, "5": {"Answer Given options": {"acc": 0.47888829502939606, "acc_norm": 0.4938535542490647, "acc_norm_stderr": 0.011561558589040751, "acc_stderr": 0.011552120807053815}, "Choose Story Ending": {"acc": 0.4735435595938001, "acc_norm": 0.49331908070550506, "acc_norm_stderr": 0.011561400034509398, "acc_stderr": 0.011546234813777409}, "Novel Correct Ending": {"acc": 0.4730090860502405, "acc_norm": 0.4879743452699091, "acc_norm_stderr": 0.011559087533800692, "acc_stderr": 0.011545573278697235}, "Story Continuation and Options": {"acc": 0.47033671833244256, "acc_norm": 0.48957776590058794, "acc_norm_stderr": 0.011559920087347773, "acc_stderr": 0.01154206650976701}}}, "superglue_rte": {"0": {"GPT-3 style": {"acc": 0.5379061371841155, "acc_norm": 0.48375451263537905, "acc_norm_stderr": 0.030080573208738064, "acc_stderr": 0.030009848912529113}, "MNLI crowdsource": {"acc": 0.5270758122743683, "acc_norm": 0.5270758122743683, "acc_norm_stderr": 0.030052303463143706, "acc_stderr": 0.030052303463143706}, "does it follow that": {"acc": 0.516245487364621, "acc_norm": 0.5270758122743683, "acc_norm_stderr": 0.030052303463143706, "acc_stderr": 0.030080573208738064}, "guaranteed true": {"acc": 0.5342960288808665, "acc_norm": 0.5270758122743683, "acc_norm_stderr": 0.030052303463143706, "acc_stderr": 0.030025579819366422}, "should assume": {"acc": 0.5306859205776173, "acc_norm": 0.5270758122743683, "acc_norm_stderr": 0.030052303463143706, "acc_stderr": 0.03003973059219781}}, "1": {"GPT-3 style": {"acc": 0.48375451263537905, "acc_norm": 0.49097472924187724, "acc_norm_stderr": 0.030091559826331334, "acc_stderr": 0.030080573208738064}, "MNLI crowdsource": {"acc": 0.48375451263537905, "acc_norm": 0.48014440433212996, "acc_norm_stderr": 0.030072723167317194, "acc_stderr": 0.030080573208738064}, "does it follow that": {"acc": 0.4981949458483754, "acc_norm": 0.4981949458483754, "acc_norm_stderr": 0.030096267148976626, "acc_stderr": 0.030096267148976633}, "guaranteed true": {"acc": 0.48014440433212996, "acc_norm": 0.48736462093862815, "acc_norm_stderr": 0.030086851767188564, "acc_stderr": 0.030072723167317194}, "should assume": {"acc": 0.4729241877256318, "acc_norm": 0.48014440433212996, "acc_norm_stderr": 0.0300727231673172, "acc_stderr": 0.030052303463143706}}, "2": {"GPT-3 style": {"acc": 0.5523465703971119, "acc_norm": 0.555956678700361, "acc_norm_stderr": 0.029907396333795994, "acc_stderr": 0.02993107036293953}, "MNLI crowdsource": {"acc": 0.5018050541516246, "acc_norm": 0.5018050541516246, "acc_norm_stderr": 0.030096267148976633, "acc_stderr": 0.030096267148976633}, "does it follow that": {"acc": 0.5054151624548736, "acc_norm": 0.4981949458483754, "acc_norm_stderr": 0.030096267148976626, "acc_stderr": 0.030094698123239966}, "guaranteed true": {"acc": 0.49458483754512633, "acc_norm": 0.49458483754512633, "acc_norm_stderr": 0.030094698123239966, "acc_stderr": 0.030094698123239966}, "should assume": {"acc": 0.49458483754512633, "acc_norm": 0.4981949458483754, "acc_norm_stderr": 0.030096267148976626, "acc_stderr": 0.030094698123239966}}, "3": {"GPT-3 style": {"acc": 0.5054151624548736, "acc_norm": 0.4981949458483754, "acc_norm_stderr": 0.030096267148976633, "acc_stderr": 0.030094698123239966}, "MNLI crowdsource": {"acc": 0.4981949458483754, "acc_norm": 0.4729241877256318, "acc_norm_stderr": 0.030052303463143706, "acc_stderr": 0.030096267148976633}, "does it follow that": {"acc": 0.48736462093862815, "acc_norm": 0.5090252707581228, "acc_norm_stderr": 0.030091559826331334, "acc_stderr": 0.030086851767188564}, "guaranteed true": {"acc": 0.4981949458483754, "acc_norm": 0.48014440433212996, "acc_norm_stderr": 0.0300727231673172, "acc_stderr": 0.030096267148976626}, "should assume": {"acc": 0.48375451263537905, "acc_norm": 0.48736462093862815, "acc_norm_stderr": 0.030086851767188564, "acc_stderr": 0.030080573208738064}}, "4": {"GPT-3 style": {"acc": 0.5270758122743683, "acc_norm": 0.5018050541516246, "acc_norm_stderr": 0.030096267148976633, "acc_stderr": 0.030052303463143706}, "MNLI crowdsource": {"acc": 0.49458483754512633, "acc_norm": 0.51985559566787, "acc_norm_stderr": 0.030072723167317184, "acc_stderr": 0.030094698123239966}, "does it follow that": {"acc": 0.5090252707581228, "acc_norm": 0.51985559566787, "acc_norm_stderr": 0.030072723167317184, "acc_stderr": 0.030091559826331334}, "guaranteed true": {"acc": 0.47653429602888087, "acc_norm": 0.4981949458483754, "acc_norm_stderr": 0.030096267148976633, "acc_stderr": 0.03006330041190266}, "should assume": {"acc": 0.4729241877256318, "acc_norm": 0.48375451263537905, "acc_norm_stderr": 0.030080573208738064, "acc_stderr": 0.030052303463143706}}, "5": {"GPT-3 style": {"acc": 0.48375451263537905, "acc_norm": 0.4729241877256318, "acc_norm_stderr": 0.030052303463143706, "acc_stderr": 0.030080573208738064}, "MNLI crowdsource": {"acc": 0.49458483754512633, "acc_norm": 0.4981949458483754, "acc_norm_stderr": 0.030096267148976626, "acc_stderr": 0.030094698123239966}, "does it follow that": {"acc": 0.48014440433212996, "acc_norm": 0.48736462093862815, "acc_norm_stderr": 0.030086851767188564, "acc_stderr": 0.0300727231673172}, "guaranteed true": {"acc": 0.49458483754512633, "acc_norm": 0.5054151624548736, "acc_norm_stderr": 0.030094698123239966, "acc_stderr": 0.030094698123239966}, "should assume": {"acc": 0.48736462093862815, "acc_norm": 0.5090252707581228, "acc_norm_stderr": 0.030091559826331334, "acc_stderr": 0.030086851767188564}}}, "winogrande": {"0": {"Replace": {"acc": 0.505130228887135, "acc_norm": 0.494869771112865, "acc_norm_stderr": 0.014051745961790523, "acc_stderr": 0.014051745961790513}, "True or False": {"acc": 0.4964483030781373, "acc_norm": 0.494869771112865, "acc_norm_stderr": 0.014051745961790516, "acc_stderr": 0.01405213114691586}, "does underscore refer to": {"acc": 0.5067087608524072, "acc_norm": 0.49329123914759276, "acc_norm_stderr": 0.014051220692330349, "acc_stderr": 0.014051220692330349}, "stand for": {"acc": 0.5256511444356748, "acc_norm": 0.5138121546961326, "acc_norm_stderr": 0.014047122916440419, "acc_stderr": 0.014033980956108558}, "underscore refer to": {"acc": 0.5130228887134964, "acc_norm": 0.4964483030781373, "acc_norm_stderr": 0.014052131146915857, "acc_stderr": 0.01404771839399767}}, "1": {"Replace": {"acc": 0.5059194948697711, "acc_norm": 0.4972375690607735, "acc_norm_stderr": 0.014052271211616448, "acc_stderr": 0.01405150083848581}, "True or False": {"acc": 0.4925019731649566, "acc_norm": 0.49329123914759276, "acc_norm_stderr": 0.014051220692330349, "acc_stderr": 0.014050905521228577}, "does underscore refer to": {"acc": 0.5169692186266772, "acc_norm": 0.5011838989739542, "acc_norm_stderr": 0.014052446290529015, "acc_stderr": 0.014044390401612976}, "stand for": {"acc": 0.5146014206787688, "acc_norm": 0.500394632991318, "acc_norm_stderr": 0.014052481306049516, "acc_stderr": 0.014046492383275832}, "underscore refer to": {"acc": 0.5027624309392266, "acc_norm": 0.5019731649565904, "acc_norm_stderr": 0.014052376259225636, "acc_stderr": 0.014052271211616441}}, "2": {"Replace": {"acc": 0.4980268350434096, "acc_norm": 0.4964483030781373, "acc_norm_stderr": 0.014052131146915867, "acc_stderr": 0.014052376259225636}, "True or False": {"acc": 0.5027624309392266, "acc_norm": 0.4940805051302289, "acc_norm_stderr": 0.014051500838485807, "acc_stderr": 0.014052271211616441}, "does underscore refer to": {"acc": 0.489344909234412, "acc_norm": 0.4964483030781373, "acc_norm_stderr": 0.014052131146915853, "acc_stderr": 0.0140492945362904}, "stand for": {"acc": 0.5082872928176796, "acc_norm": 0.5027624309392266, "acc_norm_stderr": 0.014052271211616441, "acc_stderr": 0.014050555322824189}, "underscore refer to": {"acc": 0.5011838989739542, "acc_norm": 0.505130228887135, "acc_norm_stderr": 0.014051745961790513, "acc_stderr": 0.014052446290529019}}, "3": {"Replace": {"acc": 0.5090765588003157, "acc_norm": 0.48855564325177586, "acc_norm_stderr": 0.014048804199859322, "acc_stderr": 0.014050170094497707}, "True or False": {"acc": 0.516179952644041, "acc_norm": 0.5011838989739542, "acc_norm_stderr": 0.014052446290529019, "acc_stderr": 0.0140451261309786}, "does underscore refer to": {"acc": 0.500394632991318, "acc_norm": 0.4877663772691397, "acc_norm_stderr": 0.014048278820405616, "acc_stderr": 0.014052481306049516}, "stand for": {"acc": 0.49329123914759276, "acc_norm": 0.4925019731649566, "acc_norm_stderr": 0.014050905521228577, "acc_stderr": 0.014051220692330349}, "underscore refer to": {"acc": 0.500394632991318, "acc_norm": 0.5011838989739542, "acc_norm_stderr": 0.014052446290529015, "acc_stderr": 0.014052481306049516}}, "4": {"Replace": {"acc": 0.5011838989739542, "acc_norm": 0.49013417521704816, "acc_norm_stderr": 0.014049749833367592, "acc_stderr": 0.014052446290529015}, "True or False": {"acc": 0.500394632991318, "acc_norm": 0.4996053670086819, "acc_norm_stderr": 0.014052481306049512, "acc_stderr": 0.014052481306049516}, "does underscore refer to": {"acc": 0.48855564325177586, "acc_norm": 0.49013417521704816, "acc_norm_stderr": 0.014049749833367596, "acc_stderr": 0.014048804199859332}, "stand for": {"acc": 0.5035516969218626, "acc_norm": 0.4988161010260458, "acc_norm_stderr": 0.014052446290529015, "acc_stderr": 0.014052131146915867}, "underscore refer to": {"acc": 0.49013417521704816, "acc_norm": 0.4925019731649566, "acc_norm_stderr": 0.014050905521228573, "acc_stderr": 0.014049749833367596}}, "5": {"Replace": {"acc": 0.5114443567482242, "acc_norm": 0.4925019731649566, "acc_norm_stderr": 0.01405090552122858, "acc_stderr": 0.014048804199859325}, "True or False": {"acc": 0.5011838989739542, "acc_norm": 0.5011838989739542, "acc_norm_stderr": 0.014052446290529012, "acc_stderr": 0.014052446290529019}, "does underscore refer to": {"acc": 0.4964483030781373, "acc_norm": 0.500394632991318, "acc_norm_stderr": 0.014052481306049516, "acc_stderr": 0.014052131146915852}, "stand for": {"acc": 0.5114443567482242, "acc_norm": 0.516179952644041, "acc_norm_stderr": 0.014045126130978601, "acc_stderr": 0.014048804199859329}, "underscore refer to": {"acc": 0.48697711128650356, "acc_norm": 0.4877663772691397, "acc_norm_stderr": 0.01404827882040562, "acc_stderr": 0.014047718393997663}}}} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_0.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..049ec5baf2fababd81cf0598b386b1a4d32e36a8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.2304122240550486, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.02282542594810443 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.060241646379365375, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0018281076733804075 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.24592443008643936, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.00451257736641835 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.088654136660333, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0018667826313676483 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.028611694237188352, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.001256141851839684 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.1211577510207888, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0030274510689492543 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.04185850357277026, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0011315772112214053 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.059215633929813966, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0017815112040087317 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.2428343490294157, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004471374721999319 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08728570338341435, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001811977941125175 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.057472515102994094, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0017658517107930081 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.2343449114940915, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.004180785536759541 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08440847938326362, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0017495023319111394 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_1.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..8bf03ed9d407474f53ba7d1bcec54315c774bf14 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.24093765005017717, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.0230030873247297 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.06608421860053589, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.002336019334031938 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.21220436916634114, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004053789065265438 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08727381267513917, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.002019085465483018 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.026958060365485183, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0014325663154050872 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09594460677384929, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002636689924376356 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03690045692783099, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0011932344773442297 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.0627130320531275, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0022019609556712485 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.20593946270610541, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003942128258116945 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08339382486023829, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0018750612270331736 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.06314633303729772, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.00225899090820857 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.20495711011670537, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003873468183707786 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08340965215556247, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001888288916759086 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_2.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..47dfdb93f6eeba9828be77a2caddffc4cd80fe1d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.233031239019028, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.018622989143777895 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.0669033461265895, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0022457356623633616 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.2210311245370995, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004136964100813092 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.0894179415607547, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0020679188927635165 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.02737441536683152, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0012163404235599126 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10228061138551528, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0027171296640774193 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03847532772409176, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0011774128694059217 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.0627740873189878, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0020419307238977585 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21409346975466498, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004009271610351055 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08484682008716848, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0018654847610348828 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.06306585978755326, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.002078016863885293 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.21306042006535458, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003959496879716326 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08486627231660553, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0018862109332949393 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_3.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..1696af36d618daab784669df3b436e650900a1d0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.23948204833652226, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.02987129954951102 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.06118912078285516, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0018874100598924987 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.21690381058073044, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004037786222557515 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08516966035997384, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0019653410958603845 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.025691103456522035, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0009761929133606694 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09913747741149387, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0026775514547381393 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.036941223370484465, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0011451903532238477 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.057680973751972266, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001710672833760511 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21034092384649808, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003897565257297013 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08113455126274569, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0017918361111978508 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.058077399704829655, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0017441871631557635 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.2102632726423538, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003885092717560431 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08135704576559996, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001809777621388229 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_4.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..4e2f559bc970826d25fa545297fa750ac5d72249 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.23249910495908727, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.029070914247746167 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.06371462011124131, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0019856815389907496 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.2217109306879308, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003947138405833181 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08752577226961719, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001988775968059262 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.026831790674440324, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.001147287521474585 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10130367868890543, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0025744699017443726 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03752454514172853, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.001129399736645277 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.05966276178114979, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0017451674391315488 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21514059835508997, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0038265964816655085 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08315980831393469, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0017913382350644002 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.060216714540077834, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0017985198514999731 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.21496362989260864, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0038038383785956613 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08355071339347263, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0018285792853000794 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_5.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..5dda1f7d3fd22bf7f1063f92ce0977d3cf9da140 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.22081286066426478, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.02274325142571731 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.06624132704477384, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.002139644596424952 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.22692456733410507, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004091333470382997 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.09147989131946721, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.002205390270555857 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.02768138829423821, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0011951077136256075 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10373515662428917, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0027182295029531743 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03903350582230713, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00122131545348737 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.06135390440151855, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001830142897495819 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.2187629645915043, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003916988813854803 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.0861662328266173, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0019549257073660665 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.06154502173900534, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0018661465944842495 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.21792767487533674, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0038849552847416537 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.0861577572212862, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019790861267728415 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_0.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_0.json new file mode 100644 index 0000000000000000000000000000000000000000..c5d1f2d88a559fc9575b2116132342ea1ced9888 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_precision": 0.021212341576732085, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0007573290174140969 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_recall": 0.15394872315339908, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0025907246130094292 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_fmeasure": 0.03406862468978302, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0007291397354522542 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_precision": 0.0007330327285282334, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00013102839162176146 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_recall": 0.0060791270967935854, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0005096344336408659 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_fmeasure": 0.0010295704236990917, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 9.29161964969125e-05 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_precision": 0.020816161702046237, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0006635945167961655 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_recall": 0.15359448098622203, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.00258622832573184 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_fmeasure": 0.033775131528634185, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0006877591126027594 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_precision": 0.014879313282812858, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0006265155007215373 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_recall": 0.11208211458026511, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.001969123293960845 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_fmeasure": 0.023579045736592925, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0005251315581581948 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "bleu": 0.005621069280208802, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.00010070462717428731 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_1.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_1.json new file mode 100644 index 0000000000000000000000000000000000000000..eecfa1f31727fc6d23a1b0025f3858dfc81bef19 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_precision": 0.1725932493840725, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.002529386481031639 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_recall": 0.1604306963623193, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002484650213252913 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_fmeasure": 0.14612427698267197, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0018059192168654533 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_precision": 0.018139617142187602, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.001178810295076615 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_recall": 0.017838294270011764, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0010680407233547252 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_fmeasure": 0.015048493432535332, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0008510426237769514 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_precision": 0.1452889883810266, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0021500071787494027 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_recall": 0.13484344118305408, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0020730900587942985 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_fmeasure": 0.12193728452169696, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014342614701120422 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_precision": 0.1538719868843642, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0022759439069310527 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_recall": 0.1423996572873628, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.002170737056924197 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_fmeasure": 0.12940784544932987, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015587985285077882 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "bleu": 0.7928900478653991, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.13053518516004906 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_2.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_2.json new file mode 100644 index 0000000000000000000000000000000000000000..784c3a801c361afdc3e161f2fe84643ed0491960 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_precision": 0.21086754974453098, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.003481087853726294 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_recall": 0.25038766375814814, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004322057797344624 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_fmeasure": 0.1907465620653806, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0025141994758677614 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_precision": 0.046020553876655636, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.002081339235092095 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_recall": 0.06454664623813919, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0024885004130050735 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_fmeasure": 0.04229150796312311, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0016292319117673752 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_precision": 0.17652392027199135, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.003026510309888636 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_recall": 0.2138167297173281, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003888991530572478 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_fmeasure": 0.15945985445699104, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.00211755282820529 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_precision": 0.18652878286693028, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.003173835353071881 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_recall": 0.21970003626695772, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003760387533497808 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_fmeasure": 0.16758058695841305, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002234265089238999 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "bleu": 1.035688482397717, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.07979180056283594 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_3.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_3.json new file mode 100644 index 0000000000000000000000000000000000000000..962a9dcd9a8fb79928d180115d92ade988cc38dd --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_precision": 0.24945289061580536, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.004234785605850604 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_recall": 0.27695089051506727, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004409164518913342 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_fmeasure": 0.2155827425525286, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0028813429865885256 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_precision": 0.06789221743946505, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.002570766391550549 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_recall": 0.08374766031914743, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002770294903947266 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_fmeasure": 0.05870693940495815, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0019495378220205084 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_precision": 0.20844479328303336, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0036153889991536138 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_recall": 0.2362160338231045, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003930516775517815 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_fmeasure": 0.1799141924288802, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0023927111472121488 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_precision": 0.21975278899164405, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0038037794972502884 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_recall": 0.24068798176788908, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0037486405001691237 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_fmeasure": 0.18810543014252795, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.00247964907125972 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "bleu": 1.4760612258997623, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.1344204674137375 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_4.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_4.json new file mode 100644 index 0000000000000000000000000000000000000000..a4d2296de65dd9fd910227a2bd75b6a6d2ef6248 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_precision": 0.2843113295309021, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.004668202372322337 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_recall": 0.2840533638331491, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004212420357643946 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_fmeasure": 0.23277564344319238, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.003033324883659851 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_precision": 0.0855143860050401, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0028864190680247532 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_recall": 0.09058538636969998, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002732539141359347 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_fmeasure": 0.06941913517884618, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0021154296895269154 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_precision": 0.24061329765622666, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0040548049932821436 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_recall": 0.2451732467444115, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003800935008924143 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_fmeasure": 0.1969438842373623, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.002591786476274177 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_precision": 0.2513996234316726, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.004211644541437699 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_recall": 0.24901463870097312, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036333762169696126 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_fmeasure": 0.20445884584732335, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002660931367820124 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "bleu": 1.4525772102533077, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.09991573236218197 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_5.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_5.json new file mode 100644 index 0000000000000000000000000000000000000000..a07bf4111d936308794115e3add3915d7af00814 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_explicit-graph-description2_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_precision": 0.3079953699411845, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0052262700820691855 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_recall": 0.2990488948523553, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004288889844947112 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge1_fmeasure": 0.24428760561857069, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0032770994487209613 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_precision": 0.09878776942732348, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.003295722862390014 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_recall": 0.09931014156182742, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00285802935368016 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rouge2_fmeasure": 0.07628545628688663, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0022936944399277277 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_precision": 0.26091647796513057, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.004518606930149643 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_recall": 0.25924988114075476, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003916835233501827 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeL_fmeasure": 0.20682176993182852, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.002772758722894698 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_precision": 0.2723426426961616, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.004751841751952048 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_recall": 0.26102854194007674, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0037065483496949393 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "rougeLsum_fmeasure": 0.21367093331703443, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0028579521145505734 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "explicit-graph-description2", + "bleu": 1.5535486678139987, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.12636178897739148 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_0.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_0.json new file mode 100644 index 0000000000000000000000000000000000000000..973a9151894c03e7b9e538669ad3e5a713901657 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "bleu": 0.06678968022431767, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.013291302987332878 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_precision": 0.029914442023692294, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0013899685699484238 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_recall": 0.17191853554387962, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0027258688235086617 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_fmeasure": 0.04180906676802502, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.000892515049715985 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_precision": 0.002770987452397337, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00026930245436038245 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_recall": 0.018458908430645037, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0012055847911475867 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_fmeasure": 0.0043358346468287505, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0003699728130996654 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_precision": 0.028773567135668157, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012807801393214825 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_recall": 0.16962928822415665, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0026788882785198187 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_fmeasure": 0.04069689961336112, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0008129334087216642 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_precision": 0.023429395403589064, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0012857906320802946 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_recall": 0.13119208466933271, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0020710838890581453 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_fmeasure": 0.03168737424504007, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0007282033005749001 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_1.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_1.json new file mode 100644 index 0000000000000000000000000000000000000000..f9e3aca923b47ddcefe86896c02ad2f3058f1ce6 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "bleu": 0.7343903998488392, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.1264580334548859 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_precision": 0.1713694988441509, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0025683178287328103 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_recall": 0.1620927633038664, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0026004642336094302 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_fmeasure": 0.1451954506401401, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0018476593880312048 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_precision": 0.018149248560522465, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0011980307264669568 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_recall": 0.018701683687898776, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0011131166018444143 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_fmeasure": 0.015147654240964752, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0008531589600636162 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_precision": 0.1438174575375414, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.002179050759564507 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_recall": 0.13648488466088793, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.002207259216754237 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_fmeasure": 0.12085106733704139, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014628495715754606 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_precision": 0.1522841828379344, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.002306578353042297 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_recall": 0.14316734345889626, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.002243661271505656 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_fmeasure": 0.12811642696509545, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015874918061787038 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_2.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_2.json new file mode 100644 index 0000000000000000000000000000000000000000..722892219bf147b3b67b0be9da1bb6eec284376f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "bleu": 0.8950469728314032, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.06813864403493262 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_precision": 0.2245739569864698, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.00394768209564399 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_recall": 0.2605817818040319, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004397225408663707 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_fmeasure": 0.19598922586079662, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.002726474308866776 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_precision": 0.05842649937474419, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0025228225800757995 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_recall": 0.07857813168613167, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002700572476720445 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_fmeasure": 0.05131564722525416, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0018242084011574487 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_precision": 0.19008397370997976, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.003446693936021226 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_recall": 0.2280794378801942, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004143086530714078 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_fmeasure": 0.16612890671250619, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.002321135524749462 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_precision": 0.19875997968955347, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.003614311300045863 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_recall": 0.22698955916927938, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0037471810061475252 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_fmeasure": 0.17165207996551565, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0024000508163840063 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_3.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_3.json new file mode 100644 index 0000000000000000000000000000000000000000..726e207c28f0d760cd66bce079adace53827f6bd --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "bleu": 1.0328999153691525, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.15218614003775394 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_precision": 0.2561698960860146, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.004586388937239763 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_recall": 0.2889121821334627, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004530658302121611 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_fmeasure": 0.21578406129162228, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.003014899947915645 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_precision": 0.0756534526059316, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0028609493658018873 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_recall": 0.09619008514491978, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0029113141102096165 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_fmeasure": 0.06387420889427227, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0020202475334694754 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_precision": 0.2162947950404591, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.003924794592166209 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_recall": 0.25398159222153943, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004265191984478587 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_fmeasure": 0.1831681982588611, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0025326526566856545 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_precision": 0.22494600706563603, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.00410385917148987 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_recall": 0.25165737151714396, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0038988712792617612 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_fmeasure": 0.18805417860233245, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0026019608331974494 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_4.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_4.json new file mode 100644 index 0000000000000000000000000000000000000000..63976ada8c4521edcffd88aa6b28ba1eb848bc27 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "bleu": 1.1848438797934058, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.10113709897455576 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_precision": 0.29191494117074934, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0048753981466979806 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_recall": 0.2982482419805292, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004362410145983633 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_fmeasure": 0.23507855170285297, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.003052960609273728 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_precision": 0.09264180506664611, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0030441647048234763 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_recall": 0.10366534795774687, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002886282274864994 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_fmeasure": 0.07429307318653183, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00210574121245154 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_precision": 0.24944159316145484, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.004252142505917989 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_recall": 0.26299891956132837, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004120440439329194 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_fmeasure": 0.20109163102106464, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.002579325282186586 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_precision": 0.25743050877782325, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.004415034565833493 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_recall": 0.25969769735732884, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003739952618361054 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_fmeasure": 0.2054154975263721, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0026590981957875476 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_5.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_5.json new file mode 100644 index 0000000000000000000000000000000000000000..6c7328b02fd7c4abd416d6b4968ef1a6af29cd20 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_implicit-graph-description_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "bleu": 1.5291942430627856, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.12407405811462165 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_precision": 0.31817552785971276, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.005410918975610136 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_recall": 0.3120825384968243, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004423006280320729 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge1_fmeasure": 0.25074445308374155, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.003280258168301159 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_precision": 0.11020791501317337, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0037693744168018546 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_recall": 0.114958415897581, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.003095157786907941 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rouge2_fmeasure": 0.08476444821349621, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.002405571742153227 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_precision": 0.2709202785200124, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.004714816857530811 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_recall": 0.2753613124108194, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004151926334002586 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeL_fmeasure": 0.21469809323475145, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.002784872235569171 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_precision": 0.28086204710118207, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.004933792503425808 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_recall": 0.27229001474697245, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003806628830915923 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "implicit-graph-description", + "rougeLsum_fmeasure": 0.21938893610824683, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0028754942341362322 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_0.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_0.json new file mode 100644 index 0000000000000000000000000000000000000000..942f15be3304e51a2f15ea485d6b41af50def345 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_precision": 0.048154071638123803, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0017394492115037211 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_recall": 0.1603960438769496, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002808863528216255 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_fmeasure": 0.054872501275754215, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0011675161117053734 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_precision": 0.0050437692571547055, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00041097690347038956 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_recall": 0.020874718274915008, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00145782883855474 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_fmeasure": 0.00644935707690507, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00048277790299858106 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_precision": 0.044143178163838545, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0015820050437832095 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_recall": 0.14857144649500334, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0026359059490226556 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_fmeasure": 0.05035459707061074, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0010577917832862004 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_precision": 0.04435825561436467, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0016624296413482802 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_recall": 0.14581597687921308, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0024815309695857325 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_fmeasure": 0.04968291242100845, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0010556937198168178 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "bleu": 0.06942821394863977, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.00721092856708907 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_1.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_1.json new file mode 100644 index 0000000000000000000000000000000000000000..719d14a4fbe1bfbb24f95ab4a3d4cd1006737a5a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_precision": 0.16455416189125624, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0025710885946127505 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_recall": 0.2181617130834446, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004232534637085028 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_fmeasure": 0.15006174394417673, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0019223053607641332 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_precision": 0.023103353875520607, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0012429378612023923 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_recall": 0.051688701593469964, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0024769333300300395 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_fmeasure": 0.023945131921722818, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.001073921369797315 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_precision": 0.13925189436630736, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0021920195516649307 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_recall": 0.19238762602768703, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0040704144827113 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_fmeasure": 0.12712565779418603, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015763640466711865 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_precision": 0.14613811038014662, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0023432542987913535 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_recall": 0.19219266107645588, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036970487118417475 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_fmeasure": 0.13222747966833925, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001685437840262139 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "bleu": 0.7207771490116592, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.10258109032487232 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_2.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_2.json new file mode 100644 index 0000000000000000000000000000000000000000..119aefd7185cbb2bdaa313b1cc3c509e63123659 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_precision": 0.1840518032934248, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0033695422988167393 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_recall": 0.32294580089178854, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.005293640122083205 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_fmeasure": 0.18077149568804796, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0024357765771144814 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_precision": 0.048339216717895615, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0021060002829076923 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_recall": 0.11482202688362075, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0034238195259001535 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_fmeasure": 0.050683857012723176, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0016201788737892754 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_precision": 0.1584513791779987, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.002944143746092905 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_recall": 0.2930866134885274, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.005175150990392834 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_fmeasure": 0.15676881729864572, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.002052846994821611 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_precision": 0.163518547765267, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.003098199168660185 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_recall": 0.28299351899056346, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.004588174178628674 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_fmeasure": 0.15894468731506023, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0021537616662293205 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "bleu": 0.8172765885695147, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.050751327067539105 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_3.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_3.json new file mode 100644 index 0000000000000000000000000000000000000000..de7f60dd7b758c9c332d3252aac7cc4aa9ac81d9 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_precision": 0.21339019477940072, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.004136854182002759 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_recall": 0.36172056828283894, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.005192684637361719 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_fmeasure": 0.20493909153322515, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.002712950527519467 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_precision": 0.06875941454459712, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0027123104753735013 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_recall": 0.14026102787918196, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0035390177764360073 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_fmeasure": 0.06763126769213473, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0019242155202766913 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_precision": 0.18497245590804035, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0036161696226493217 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_recall": 0.32950137116051886, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.00509054750559144 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_fmeasure": 0.178978835556707, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0023052454882151385 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_precision": 0.18870247477597205, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0037949931460892973 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_recall": 0.31437773260575974, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.004468573983043156 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_fmeasure": 0.17901076394504797, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0023948023589502633 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "bleu": 0.8232200037553864, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.0990062331795447 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_4.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b928d3161325c9c713c87a72a496388f3f5d5616 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_precision": 0.24073524945767655, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.004707514658959234 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_recall": 0.37222777341340585, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.005100896049800429 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_fmeasure": 0.2219276041114797, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.003043064081835907 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_precision": 0.08343573004395873, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0029901511610586507 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_recall": 0.1518671437094746, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0036105628962266956 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_fmeasure": 0.07863230130745369, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.002070687205307558 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_precision": 0.2082283461845528, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.004101899636825203 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_recall": 0.3382840179138503, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.005009409498199744 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_fmeasure": 0.19296499339169593, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0025313363149249185 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_precision": 0.21271916694519305, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.00429809264467102 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_recall": 0.32505704280562625, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.004417521948317753 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_fmeasure": 0.19372279394640554, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0026569278397281402 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "bleu": 0.9445938185251443, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.09197403951561334 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_5.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_5.json new file mode 100644 index 0000000000000000000000000000000000000000..d5f3064593ee7e7d46de71b20c148318f5144086 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_non-explicit-description_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_precision": 0.26082598406654584, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0051359481387504655 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_recall": 0.3729649784159858, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004970827450327891 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge1_fmeasure": 0.23144425717217612, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0031085174926612394 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_precision": 0.09576839794606369, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0035521748356882993 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_recall": 0.1532169603638125, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.003593558920823611 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rouge2_fmeasure": 0.08355597806644839, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.002189706487328105 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_precision": 0.22583373330234002, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.004534237669240004 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_recall": 0.3374628358280029, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004847122676128511 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeL_fmeasure": 0.2012929842528386, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.002610242315697286 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_precision": 0.23006368905799984, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.00470015880813854 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_recall": 0.3255358190359428, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.004336770681537931 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "rougeLsum_fmeasure": 0.2020118383660613, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002720953194833289 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "non-explicit-description", + "bleu": 1.0828952105973466, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.08428338494461286 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_0.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_0.json new file mode 100644 index 0000000000000000000000000000000000000000..0b847b8cf66e716f2524e8cf2b0123401606f719 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_precision": 0.09839982038939207, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0016034845059760892 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_recall": 0.557046636180574, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004459443954469606 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_fmeasure": 0.15932996809565791, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0021975666324784045 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_precision": 0.03974822393863394, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0009259214471201931 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_recall": 0.23724700480115124, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.003530724556191307 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_fmeasure": 0.0645288094675153, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0013271654890972852 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_precision": 0.08556676553539082, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012344258273556512 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_recall": 0.5120237901085162, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004389079693859156 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_fmeasure": 0.13994234816561368, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001713279737012679 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_precision": 0.08372529560944662, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.001428553657612201 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_recall": 0.47520736805531627, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003928372595420955 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_fmeasure": 0.13548370931568984, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001965481361266326 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "bleu": 0.70556499870502, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.07021163423422254 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_1.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_1.json new file mode 100644 index 0000000000000000000000000000000000000000..63895f7ba129882a79671c07ca4c20d4286a50f5 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_precision": 0.15192244328141744, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0026851175239504146 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_recall": 0.4088076221119547, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.005307206147263785 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_fmeasure": 0.1795535626460401, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0023078723613743383 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_precision": 0.042264113746990004, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.001519132737951407 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_recall": 0.15816170460414825, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.003648850558319156 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_fmeasure": 0.056216955504673816, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0015622457935091263 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_precision": 0.13222718563169758, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0023137442474583567 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_recall": 0.37772895967872394, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.005250016280580306 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_fmeasure": 0.15827887417485154, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0019506890461920023 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_precision": 0.1331053325871554, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0024745486155965113 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_recall": 0.3552035560328879, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.004588945170502327 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_fmeasure": 0.15584864004949936, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0020620407818320073 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "bleu": 0.7666335106831184, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.0635865012850457 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_2.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_2.json new file mode 100644 index 0000000000000000000000000000000000000000..7f78c368c97888e39516b09c1c4a07cc65671c4b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_precision": 0.18082568461374282, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.003639459051365589 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_recall": 0.4264948458628402, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004935119334379477 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_fmeasure": 0.1995262410573067, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0024883401069483438 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_precision": 0.06212406576427143, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0023869401244636375 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_recall": 0.17467748933722366, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.003536521311814631 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_fmeasure": 0.07054805798392726, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0016830190110589496 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_precision": 0.15912923447907845, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0032345958330729694 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_recall": 0.39493138391122135, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004915557812492545 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_fmeasure": 0.1770227263859978, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0020868398943825295 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_precision": 0.15978841140185437, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0033957422368174534 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_recall": 0.37244471989166417, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.004290031616626638 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_fmeasure": 0.1741607140832361, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0022309499919646618 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "bleu": 0.8036503769411473, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.08352144767185858 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_3.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_3.json new file mode 100644 index 0000000000000000000000000000000000000000..0147a1a91235f4275f9f5390ed692222ab967981 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_precision": 0.20161323138180814, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.00419614102063665 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_recall": 0.4284437489377629, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004747351313475394 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_fmeasure": 0.20998314436048413, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0026958272587435566 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_precision": 0.07075454828433567, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0025432659534373814 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_recall": 0.17982051325515352, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0035838481085783518 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_fmeasure": 0.07655162918992574, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.001828909076942692 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_precision": 0.17689528139425809, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0036676387595752816 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_recall": 0.39708608920535615, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004751490642713908 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_fmeasure": 0.1863768106489585, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.002259606335187374 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_precision": 0.1782191584681922, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0038632898410200503 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_recall": 0.37361714826137415, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.004151105270083309 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_fmeasure": 0.18356302820432102, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0024163227573974415 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "bleu": 0.9213985772786043, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.09261357714373215 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_4.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_4.json new file mode 100644 index 0000000000000000000000000000000000000000..9637ebf7790d76750672612d53688bcff2649b3d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_precision": 0.22590230879356132, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.004885739171030689 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_recall": 0.4183617818367607, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004646737629595512 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_fmeasure": 0.2203882766391892, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.003092911335667608 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_precision": 0.08369577119049203, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0029217645456625545 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_recall": 0.17612573754542205, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0035387797965567075 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_fmeasure": 0.08291524084048744, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.001997647183245828 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_precision": 0.19700967528722899, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.00420918433484521 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_recall": 0.3860053232564878, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004602487402565482 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_fmeasure": 0.19451918196713988, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0025549062189435443 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_precision": 0.19774103481256436, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.004390815504231839 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_recall": 0.3641519676371665, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.004085150565177179 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_fmeasure": 0.1912941284472393, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.00269200231433118 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "bleu": 0.8669579841221637, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.08180503550292748 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_5.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_5.json new file mode 100644 index 0000000000000000000000000000000000000000..ce03c281d36cfa7e551cb8b1eeee38690184cc1a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-web_nlg_en_very-explicit-description_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_precision": 0.2333099109106658, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.004918167379356697 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_recall": 0.40468387245463144, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004615290964853879 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge1_fmeasure": 0.221868544853033, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.003062043218874345 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_precision": 0.08575749645339896, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0029475165939869268 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_recall": 0.16978922632954452, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00346238061542852 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rouge2_fmeasure": 0.08265916020277501, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.001971954733869372 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_precision": 0.20506533085409256, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.004283642129477899 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_recall": 0.37349518444433055, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004568965644534639 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeL_fmeasure": 0.19660756094703621, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0025597130950611555 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_precision": 0.20613211140017276, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.004444626909561573 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_recall": 0.3539709992804558, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.004068457607282714 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "rougeLsum_fmeasure": 0.19418171975741957, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002689265498940047 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "very-explicit-description", + "bleu": 0.949825770028968, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.07260355364195568 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_0.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..359d7be67fe7d586fb1dbf13be91dbb745b20984 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_precision": 0.1975977152505175, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0019210439152713556 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_recall": 0.3571450155403217, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0027159219787335877 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_fmeasure": 0.23668156796351772, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0017834284452177681 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_precision": 0.04679995732393755, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0008453000647757492 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_recall": 0.08872427020642143, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.001757359421073499 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_fmeasure": 0.056425445419599346, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009610968832196708 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_precision": 0.13079025220025553, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001163188240810311 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_recall": 0.24818548845368613, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.002206255979336452 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_fmeasure": 0.15899265084812766, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001137037008281727 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_precision": 0.18296614395904412, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.001771259927022808 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_recall": 0.3318205839417274, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0025527759940675494 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_fmeasure": 0.21937178453510964, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0016467565432194853 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "bleu": 2.526005202191506, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.058273418733684156 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_1.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..6bafbc50ee9ec0a71414e22304fee750e5d2acfb --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_precision": 0.15054373183476955, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001998147829744307 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_recall": 0.22938700618092542, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003041552612274185 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_fmeasure": 0.1625536760242897, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0019424044371032676 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_precision": 0.026747967720227677, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0008081761640449396 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_recall": 0.04688223062252883, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.001502981154526947 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_fmeasure": 0.03048470823212383, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0008439966461249648 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_precision": 0.11130833909157777, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001381727326473868 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_recall": 0.17251099505438725, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.002267024463807693 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_fmeasure": 0.11990114200505439, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0012809983090350478 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_precision": 0.14096779097971843, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0018582505585400385 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_recall": 0.21419795786755574, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.00281418674755583 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_fmeasure": 0.15185020212271533, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0017871174330384631 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "bleu": 1.7313590804890209, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.05908907418295776 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_2.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ca60611265b08189edeb49856296625bf12a63ed --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_precision": 0.14664845124249912, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0021074927136243747 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_recall": 0.22029309843953365, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002950858143159719 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_fmeasure": 0.15659972337101613, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001906663384031481 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_precision": 0.025832066720273383, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0008715375341951669 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_recall": 0.04297038579858274, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0014465923870447984 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_fmeasure": 0.028283684834043706, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000821620933532687 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_precision": 0.11186134311345497, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0015665677693472357 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_recall": 0.17054780622395707, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.002242184695689906 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_fmeasure": 0.11924857854706734, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013235141134357415 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_precision": 0.1363376415997164, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0019672230168232964 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_recall": 0.20473556515895666, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.002733779107620804 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_fmeasure": 0.14532637028195308, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0017544389892467619 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "bleu": 1.5124106778411996, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.06141046632956911 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_3.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..ae19a79ac0ffb2f6b0e97e7b67cd60b97a2d4996 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_precision": 0.1237560514705609, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.002282039852755364 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_recall": 0.17609270530427382, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0030210209846962413 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_fmeasure": 0.1258659396327009, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001992963699044068 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_precision": 0.020954462249452663, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007965730554829935 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_recall": 0.033657403446598254, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.001329371810945043 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_fmeasure": 0.021957452841427055, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0007360198417813418 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_precision": 0.09658445599443373, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001753215991240076 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_recall": 0.13975525526667698, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0023736809100178683 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_fmeasure": 0.0981193897316493, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014598347738579937 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_precision": 0.11453614366434078, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.002105151678823837 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_recall": 0.16338969575022716, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0028019873925272075 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_fmeasure": 0.11650869217135837, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001832893857886487 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "bleu": 1.3802963074575254, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.06732152627137003 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_4.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..1a8214f32fc9af1df7a23482c6d2106c66756b67 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_precision": 0.04395368484664138, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0018424926390462097 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_recall": 0.05774517711149188, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002305158478827759 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_fmeasure": 0.04204648300277925, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016015557898894717 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_precision": 0.00844841454877157, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006603987371609425 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_recall": 0.012177055063554206, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0009005039320534978 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_fmeasure": 0.008202499844200142, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005608948554894471 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_precision": 0.035216144797083765, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0014914106117576477 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_recall": 0.04621071195954536, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001796318755601515 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_fmeasure": 0.03339087455481403, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0012365521644087543 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_precision": 0.04090439759741698, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0017246420675129577 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_recall": 0.05360227908215058, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0021376470771195598 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_fmeasure": 0.0389854987222177, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001481057274442113 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "bleu": 0.22377593301174895, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.022223303535369093 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_5.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..573ae376a26e32bc4ad0420586b5e762f47bab18 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_article_summary_en_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_precision": 0.006968711039755044, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0008981875218117217 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_recall": 0.009045805133122158, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0010585291219916539 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge1_fmeasure": 0.00624650026917102, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0006896033744358352 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_precision": 0.00147397688199298, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00031382142309567925 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_recall": 0.0023038793256885807, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0005038053110111991 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rouge2_fmeasure": 0.001354763281978201, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0002354570603752033 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_precision": 0.005684552101183715, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0007811869359473511 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_recall": 0.0073860095468518735, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0008694725694459097 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeL_fmeasure": 0.004998030899560508, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0005484876409153318 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_precision": 0.006504958900325744, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0008576188233041718 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_recall": 0.008412531590010163, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0009903950820560048 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "rougeLsum_fmeasure": 0.005785170257262004, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.000640536066601862 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "article_summary_en", + "bleu": 1.3197166417911808e-08, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 2.839866989657345e-08 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_0.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..3d4c81434a0444855cfa0815d1ca61988992ad5d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_precision": 0.09557902324303133, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015934233011554208 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_recall": 0.15195533247270118, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002167076868910249 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_fmeasure": 0.10814475341505056, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001524767202301828 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_precision": 0.010021499848992905, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00042254787804722635 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_recall": 0.0175643734135993, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0008555885514606594 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_fmeasure": 0.011588030659270141, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0004784186900067079 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_precision": 0.08020042698947122, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012460897241632469 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_recall": 0.13117713414918547, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0018429649355817298 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_fmeasure": 0.09167513906886186, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0012095215627801854 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_precision": 0.08997576030672756, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014864753127451467 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_recall": 0.1439070530072875, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.002057912841397234 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_fmeasure": 0.10198753546847392, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014211405382431829 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "bleu": 0.6145809763628965, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.05797766316597433 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_1.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..f24856911adcfd4a779d53b06dc16b780c944f38 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_precision": 0.11274337869180198, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015080376244132586 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_recall": 0.10983840856878206, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0014551714150494175 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_fmeasure": 0.0979457603122117, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0011364196004116048 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_precision": 0.004607065202297954, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00029696477370218157 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_recall": 0.004443770455530801, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00030921576747671716 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_fmeasure": 0.003914558098208722, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00023624265329673873 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_precision": 0.09101884007968876, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001195226939453244 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_recall": 0.08894440577554404, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001162325195438461 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_fmeasure": 0.07861266088907197, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0008536301328765664 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_precision": 0.10885553700903176, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014410450568103732 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_recall": 0.10635049542751197, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0014076769390450833 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_fmeasure": 0.09467060993519806, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0010886015566558735 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "bleu": 0.2408792681226857, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.024130354527193396 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_2.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..bd1f289f17c3a914d6ee3c8b4d2755b011f84a9a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_precision": 0.11450203486111951, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015512493201278732 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_recall": 0.11764026218464517, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0017763168006867999 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_fmeasure": 0.10129546926820872, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0012376127878624412 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_precision": 0.005990341232345204, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0003778566735238687 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_recall": 0.007150893881572016, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0005954055858899312 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_fmeasure": 0.005443826668587504, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0003144404879778184 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_precision": 0.09176826957181027, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012268770230593445 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_recall": 0.09444076989884408, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0014215142609522416 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_fmeasure": 0.08046273244596494, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0009128776785211843 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_precision": 0.11011996762454923, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014803727480206098 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_recall": 0.11312057420513341, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0016882892537450165 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_fmeasure": 0.09737556397488158, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0011716121889168436 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "bleu": 0.42527357697495255, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.06953287436188474 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_3.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..04beaf84e1a5383646a6f397569725b059a28b28 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_precision": 0.10053229311714834, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0018186300913397432 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_recall": 0.10555528455488752, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0020869634882788126 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_fmeasure": 0.08745924967780058, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.00145734596535055 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_precision": 0.007244189708893669, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00047064519532168336 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_recall": 0.009481343391595305, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0007029880713328167 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_fmeasure": 0.0066746059699027156, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000406229568602048 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_precision": 0.08113908472999296, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0014422772176468318 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_recall": 0.0854911553393767, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001688161117709379 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_fmeasure": 0.06994035855230117, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0011004649845343271 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_precision": 0.09576776212352223, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0017136097429588115 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_recall": 0.10032211171110292, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.001963048810667937 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_fmeasure": 0.08314051391123733, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001363454598942612 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "bleu": 0.5943947138668615, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.04701176872890474 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_4.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..a61c63cf8b872b7168f2cee7e7aa651b5e3d6232 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_precision": 0.035781196323287794, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015508373183201957 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_recall": 0.03889017824317372, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0017168088677622815 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_fmeasure": 0.03034948263830278, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0012288540754404823 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_precision": 0.00393091834595338, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0004260496428881099 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_recall": 0.0052037277375677786, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.000540702114853635 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_fmeasure": 0.003519391416781204, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0003344402761329188 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_precision": 0.02946476725548691, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012750099020740124 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_recall": 0.03200016757522908, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0013894967755151602 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_fmeasure": 0.02470528087251438, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0009707901157133207 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_precision": 0.03393706130302841, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014660077905268025 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_recall": 0.03664599115059629, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0016081833996057495 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_fmeasure": 0.028653137000749736, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0011499184993932895 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "bleu": 0.06155277062691283, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.008573159858072549 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_5.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..204815d72aa6aee4ea90c1e1d021a3b1fa5f8875 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_rephrase_en_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_precision": 0.005466518578601791, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0006360451019998415 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_recall": 0.005849281181457089, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.00074547507260093 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge1_fmeasure": 0.004552330658915172, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0005166062646006778 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_precision": 0.0007290381746496603, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00017635675582582084 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_recall": 0.0008879185087143063, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0002685233025455508 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rouge2_fmeasure": 0.0005748508715984116, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0001533404190150482 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_precision": 0.004641407616449875, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0005531411811140234 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_recall": 0.004667322692384839, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0005697941762332262 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeL_fmeasure": 0.0037163176139750516, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0004064756060554951 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_precision": 0.005301656522138449, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0006174490006150588 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_recall": 0.005552151354486044, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0006921370486199647 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "rougeLsum_fmeasure": 0.00434986186087182, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.00048298225975487925 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "rephrase_en", + "bleu": 9.384428445264235e-13, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 2.048496076777406e-11 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_0.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..8d04dbe80a2e11448cbcb3f52f58a6a4dbd89f73 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_precision": 0.06083058515622332, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015146719263047607 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_recall": 0.08130079272174061, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0017565791128464651 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_fmeasure": 0.06398795450213274, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014079644213715252 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_precision": 0.0050127471219516785, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00033788530642644006 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_recall": 0.006932802618765814, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00047568921018848896 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_fmeasure": 0.005239039550783603, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0003252538524207895 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_precision": 0.056280510609653965, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0013608121285239516 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_recall": 0.07622653073256713, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0016225806985423702 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_fmeasure": 0.05958527780779474, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0012805893801553889 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_precision": 0.05661921621032954, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.001398882606899158 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_recall": 0.0762057149480866, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.00164425094307365 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_fmeasure": 0.05971017630902041, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013030395843137727 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "bleu": 0.2622610605101529, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.040142958508706195 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_1.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..6a8d6a473bf6aa3d40a195a16052062cbfd2b5d0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_precision": 0.11326508510637766, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015009170735602506 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_recall": 0.1085356303020659, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0014115882108590343 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_fmeasure": 0.09793700832399474, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0011273790641300194 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_precision": 0.004465655727904388, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00028238368212925503 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_recall": 0.004056065876580251, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0002691375832689169 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_fmeasure": 0.0037395950495648657, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00022671756481262202 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_precision": 0.09142917649433346, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011841565139209087 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_recall": 0.0879166314153642, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0011318568380660808 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_fmeasure": 0.07862622799687088, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0008488937771314072 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_precision": 0.1093895229104316, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014332473740733347 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_recall": 0.10513740023129456, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0013659288669536467 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_fmeasure": 0.09470526990675743, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0010805621439727475 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "bleu": 0.2382502920735501, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.02364732083952621 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_2.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ff6007cfb1e2360f9a40f795e19881c634a9c5b8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_precision": 0.1195188168282025, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001703506896411307 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_recall": 0.1202411166531759, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0016720477170315902 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_fmeasure": 0.10460998387087274, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0012689330036383434 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_precision": 0.007701756834163109, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0004826378231441523 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_recall": 0.007828279383704012, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00048735370780030713 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_fmeasure": 0.006572282621218325, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00036858964749543314 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_precision": 0.09584725807304968, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0013390205513148664 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_recall": 0.09711441691325796, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0013255057315948026 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_fmeasure": 0.08348917343043502, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0009381191407596309 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_precision": 0.11507270211895362, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0016248601270300064 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_recall": 0.11590379555656585, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0015972136190990915 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_fmeasure": 0.10076143309185726, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0012074937805280613 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "bleu": 0.590846188853053, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.08622026318742701 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_3.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..a99befc9b37709144bdb9d48309310f374ba9338 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_precision": 0.10561107711679257, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.002135116442535178 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_recall": 0.10039756771582228, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0018998579014479846 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_fmeasure": 0.08635223714862443, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001447627888207993 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_precision": 0.010532137196295798, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0008099525943800139 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_recall": 0.009664993083625497, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0006422735345150598 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_fmeasure": 0.00773667698720356, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00044084771240741574 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_precision": 0.08698250921011949, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0017942983133012848 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_recall": 0.08314557123023888, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0015976310654642362 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_fmeasure": 0.07053215664409358, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0011503059093946877 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_precision": 0.10076653862384102, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.002033643570526417 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_recall": 0.09561293861925456, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.001792928927851378 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_fmeasure": 0.08220026138629714, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013594564836760387 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "bleu": 0.5908356190981445, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.04489447675590248 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_4.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..ba076500db00872fbe5886ef2263214a23359d3d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_precision": 0.035705986442768456, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001618640602215546 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_recall": 0.03197561607719162, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0014717555351632275 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_fmeasure": 0.027402695270076872, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001157724691879728 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_precision": 0.00401479182015976, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00043777035338946497 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_recall": 0.003920400654913368, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0004172562730537138 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_fmeasure": 0.00317018932471084, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00031714568787250826 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_precision": 0.03013013623521446, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0013844671087526951 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_recall": 0.026445775089489882, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0011917063204159026 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_fmeasure": 0.022642473398844034, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0009329018940160789 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_precision": 0.033971789988392904, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.001538890119076818 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_recall": 0.03041146272732838, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0013938550134374181 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_fmeasure": 0.025956946800132486, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0010846296784172967 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "bleu": 0.029093100164270518, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.005798545611396172 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_5.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..adde3d7535fec674fcb7369b3b59fd179b44b4e0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_summarize_above_en_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_precision": 0.00595043307989505, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.000767651406351477 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_recall": 0.004778481595486118, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0006233197620300213 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge1_fmeasure": 0.00450639531355031, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.000555584129480168 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_precision": 0.0012396636176066488, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00034778633785854403 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_recall": 0.0008030145717563384, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00020935765891227752 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rouge2_fmeasure": 0.0007838553827958208, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00021051541322103913 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_precision": 0.005169809582323384, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0006759018705997662 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_recall": 0.004164169546014226, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0005467863316598697 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeL_fmeasure": 0.0039016663853958123, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.000485959181689575 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_precision": 0.005734093038513017, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0007492388147769349 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_recall": 0.004562529353283875, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.00059727108953185 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "rougeLsum_fmeasure": 0.004298574460690419, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0005305976692789258 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "summarize_above_en", + "bleu": 2.233297948144298e-16, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 7.375838022248224e-15 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_0.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..938137d774527b6a3892fb6beefe62b24cc96d8a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.10367122573898108, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0016311453272566847 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.1621848426933469, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002239905291843337 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.11655773215808052, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015844131287312152 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.012276074546298633, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0005003323251677242 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.020862324606727274, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0009624161724708909 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.014011218070534905, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005398251030136181 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.08966835876675931, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.00130510256531558 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.14342974130461322, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0019375952453362063 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.10161863320310922, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0012820750167481308 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.09728691738451277, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0015087459211237015 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.1530554381860451, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0021142656537031777 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.10956185275309588, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014665536918047877 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.7280055452213583, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.06972461776949417 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_1.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..304669044ea0d297357633e3a71253968439eb1d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.11922048835394526, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0017010312663949007 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.12373373738142388, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0018173643062502098 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.10548837365464835, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0012991497483475392 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.008234339790533517, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0005690033754874188 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.009356412797919933, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0005938642159151799 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.007230676056378635, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0004084061340426134 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.09596744652257369, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001368600668118387 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.09975162242794412, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0014345772942689061 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.0843032809934075, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0009705899345773655 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.11440643074566167, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.001619972437724354 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.11862725022797178, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0017155423340356613 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.10113335490345467, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0012233443711385755 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.5504071981349774, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.0561562373694907 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_2.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..2cb0aa3e0ef5121bda5cf9e4e9ebaef32a520ac1 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.13659331063329858, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0022487372544365814 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.14392803155149808, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0021426965551337935 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.11914801472840628, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015610543341787234 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.017190323002243903, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0009306182513579381 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.01804489565450503, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.000825364925597016 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.014134158871786717, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000595539839287858 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.11065349886551568, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0018034426665467446 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.11850505866727387, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0017735732164141282 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.09639205333227598, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0011890649378743271 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.12944158114309687, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0021224960396573653 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.1364172032563405, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.002022193400173333 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.11276308928122987, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014568432793235883 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 1.0951241331559471, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.06468580769617795 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_3.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..9a9622d33c4e84707a9d138cf04239fb4fbfa781 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.1325368577188755, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0027805256927643865 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.12731530185422324, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0023277957556830046 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.10595502188431383, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0017328375397109063 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.021208434345761078, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0012054076568754222 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.0189911073417682, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0009097509424826536 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.015114213677740143, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0006401159920792145 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.1098153192916977, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0023103747538012406 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.10713066473487352, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0019653832434662097 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08778629502724616, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013978298599760263 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.12430578995748448, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0026077066778884906 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.11938462834076304, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.002179423844395387 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.09922221098825855, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001608236044311444 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 1.0702430413152522, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.07113210724258658 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_4.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..5f0c0e44476d52206d35150b7106e6a70eeb4308 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.05029331061394819, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.002207006374572226 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.04435204949042938, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0018249482984958308 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.03772701558859641, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014496259811778097 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.009550867923430868, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0009379034663216416 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.00752924590473579, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0006210729065705739 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.006349834491065967, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000497796787181232 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.042547817990021886, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0018896153572281916 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.03761001948729707, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001533156465356555 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.031758388592257075, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0012082854006158594 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.04733936020765825, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.002088413158037917 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.041282972252420995, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0016916980136133567 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.03520149112819018, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013434925962965214 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.1011859793046015, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.014350638137120817 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_5.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..34a2e0152ee47322b48f6d337b7957ed490a27e7 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.009316160183573745, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011155532666113311 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.006701979396490877, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0007675445971480603 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.006210622496685344, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0006727949372632281 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.002346103781743029, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00054243039709031 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.0012899695832633785, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00031378439187027 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.0011831626630347113, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0002436110293420105 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.00791303900867605, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0009792864483272245 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.0056630421132579855, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0006466924999339596 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.005221351425348215, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0005750924567208641 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.008929134383929085, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010845799476391675 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.006287352900516465, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0007115097244397184 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.005877130570728575, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0006401737602435983 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 8.867413076733536e-12, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 4.796386448449034e-11 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_0.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..e889eb9a409222c986fadc043a5cb2bce5eb93c0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_precision": 0.11295901916552518, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0021561636775618825 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_recall": 0.15716194752418464, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0023658263657045466 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_fmeasure": 0.11770828296503491, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0017536210085080694 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_precision": 0.015052684287067302, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0005896104288986676 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_recall": 0.023031044736046974, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0009581139045387344 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_fmeasure": 0.016689036256012268, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0006284511806433984 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_precision": 0.09584165693939237, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001826823689459275 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_recall": 0.13488328000528105, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0019094926140183217 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_fmeasure": 0.09961737449184858, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001336830173088306 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_precision": 0.10435726439378455, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0019288664257451583 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_recall": 0.14671369444409713, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0022093912088684674 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_fmeasure": 0.10939848979449732, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001611104432360681 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "bleu": 0.8397297265679019, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.06419556092589017 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_1.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..0d7e1dd8203571e888e94633142c3ec0898da83b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_precision": 0.11280797879908812, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015176355293584295 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_recall": 0.10974032684176149, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0014531379840037003 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_fmeasure": 0.09800099824939931, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0011455058233680163 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_precision": 0.004669460064109281, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00029342600613201146 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_recall": 0.0044258058822204596, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0003029421760704375 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_fmeasure": 0.003943433623482069, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0002376042862947667 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_precision": 0.09100549212005561, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012010483184015203 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_recall": 0.08875940267139816, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0011563650751118756 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_fmeasure": 0.07857164868748949, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0008607791775228845 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_precision": 0.10880498670928285, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014497893036322746 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_recall": 0.10606739321335039, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.001402205534200808 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_fmeasure": 0.09458732701792698, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0010956881578153168 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "bleu": 0.25575772226089033, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.03569958145217157 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_2.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..a6f7b73717e8f8041ed87db6f02b49f5df3502c1 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_precision": 0.11515560674851932, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015509282528847878 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_recall": 0.12300491373534446, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0017806103612246057 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_fmeasure": 0.10458659667435967, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0012861835501711226 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_precision": 0.006395365145636472, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0003591958546304461 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_recall": 0.0077962010381853256, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0005289307912376033 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_fmeasure": 0.006134109611250491, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0003481002830495667 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_precision": 0.09124703921693972, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001206351363616421 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_recall": 0.09741908894309344, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0013730905688191196 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_fmeasure": 0.08203887538980079, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0009229225262216014 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_precision": 0.11059022840815862, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014851063649001237 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_recall": 0.11805167829668524, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0016988734128864996 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_fmeasure": 0.10037653634062631, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0012231289195270326 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "bleu": 0.48504301830637025, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.05137507780432141 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_3.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..f55f4a851f0fab54ac7b6ec80685035b8d96f083 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_precision": 0.09166326015675812, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001863618297222859 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_recall": 0.09439692771782093, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002049639853878195 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_fmeasure": 0.07909868477561151, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014723495062479318 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_precision": 0.006815303592116684, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0004965126210719875 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_recall": 0.008260177773973776, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0006508910738926214 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_fmeasure": 0.006138307630609741, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0003968736257003603 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_precision": 0.07314862524882004, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0014905595972838524 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_recall": 0.07438392591435516, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001576968342669606 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_fmeasure": 0.061996926537050155, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001089486193345418 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_precision": 0.08729866156868725, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0017644387577357547 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_recall": 0.08978161538723402, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0019225188795508 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_fmeasure": 0.07526948626877389, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013834299708953759 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "bleu": 0.4671279058356674, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.046623136526393195 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_4.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..de20b72077dfc560f82b590252cdb2bb7d4aa72e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_precision": 0.024122525784377926, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001268480469741408 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_recall": 0.02373843528685194, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0012776937483044686 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_fmeasure": 0.019517882288829225, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0009604584223230579 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_precision": 0.002090947286237258, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0003051655289255735 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_recall": 0.0023982591853571687, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0003799771234361843 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_fmeasure": 0.00167678535423955, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000209849702180494 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_precision": 0.019682867450734345, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010315579522385012 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_recall": 0.01952315908954502, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0010418891548632763 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_fmeasure": 0.015788003617005678, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0007534854041442719 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_precision": 0.023011530370527344, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0012021135466980504 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_recall": 0.02274482533911906, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0012249850245862933 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_fmeasure": 0.018599345217623072, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0009070972782003065 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "bleu": 0.004214867695194402, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.0009834876936663747 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_5.json b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..1358d9eda7867fe9556a2bee9eb3df6372f8fe8f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_GEM-wiki_lingua_en_write_abstract_en_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_precision": 0.0025439911728014682, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.00044089657241181616 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_recall": 0.002127710352300543, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0003543742353112539 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge1_fmeasure": 0.0019664292916783234, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0003120896330989726 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_precision": 0.0001923386207866496, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 7.972926456097242e-05 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_recall": 0.00015798104521609337, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 6.680290787534892e-05 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rouge2_fmeasure": 0.00013934402915387212, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 5.871856589143859e-05 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_precision": 0.0020080907200489616, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0003261035212045945 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_recall": 0.0017331074442032213, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.00028920031821567146 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeL_fmeasure": 0.00156729637433548, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.00024160981216417036 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_precision": 0.0024784113984497643, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0004288948292667904 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_recall": 0.0020662127056030035, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.00034094966500850676 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "rougeLsum_fmeasure": 0.001910974206326472, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0003010647724742626 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "write_abstract_en", + "bleu": 1.234466237515543e-32, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 8.15926545424835e-28 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_0.json new file mode 100644 index 0000000000000000000000000000000000000000..333dbae2fb266a53fbc1116d4c3c53d9e7c1a00f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "GPT-3 style", + "acc": 0.336, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.014944140233795025 + }, + { + "task_name": "anli_r1", + "prompt_name": "GPT-3 style", + "acc_norm": 0.336, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.014944140233795021 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_1.json new file mode 100644 index 0000000000000000000000000000000000000000..f1bc13083bcd18672fce435f0238394cd69b7823 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "GPT-3 style", + "acc": 0.341, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.014998131348402702 + }, + { + "task_name": "anli_r1", + "prompt_name": "GPT-3 style", + "acc_norm": 0.332, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.014899597242811482 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_2.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee42309f180605e0782e0e41d4408ae9c502a8a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "GPT-3 style", + "acc": 0.363, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015213890444671287 + }, + { + "task_name": "anli_r1", + "prompt_name": "GPT-3 style", + "acc_norm": 0.355, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015139491543780529 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_3.json new file mode 100644 index 0000000000000000000000000000000000000000..a8576ab7bb1d483eba7f4fd62e9efd9da399cc7d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "GPT-3 style", + "acc": 0.357, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015158521721486769 + }, + { + "task_name": "anli_r1", + "prompt_name": "GPT-3 style", + "acc_norm": 0.352, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015110404505648663 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_4.json new file mode 100644 index 0000000000000000000000000000000000000000..a0d4bfd1baad19f4375e916d2ba2245be444c03e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "GPT-3 style", + "acc": 0.363, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015213890444671283 + }, + { + "task_name": "anli_r1", + "prompt_name": "GPT-3 style", + "acc_norm": 0.356, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015149042659306623 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_5.json new file mode 100644 index 0000000000000000000000000000000000000000..3810aa11c1e229465235309d1e379a06974f9346 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_GPT-3-style_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "GPT-3 style", + "acc": 0.351, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015100563798316402 + }, + { + "task_name": "anli_r1", + "prompt_name": "GPT-3 style", + "acc_norm": 0.339, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.014976758771620347 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_0.json new file mode 100644 index 0000000000000000000000000000000000000000..eab232d7459014b63da8c3adaa05d6ae112f1f76 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "MNLI crowdsource", + "acc": 0.334, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.014922019523732954 + }, + { + "task_name": "anli_r1", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.33, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.01487687202745673 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_1.json new file mode 100644 index 0000000000000000000000000000000000000000..286a368fd2faf6d421cfe51e092a734166652734 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "MNLI crowdsource", + "acc": 0.333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.014910846164229863 + }, + { + "task_name": "anli_r1", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.014910846164229863 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_2.json new file mode 100644 index 0000000000000000000000000000000000000000..824bbf9689148c0ec1ccf969f558d26600e3763b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "MNLI crowdsource", + "acc": 0.359, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.01517726422479859 + }, + { + "task_name": "anli_r1", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.357, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015158521721486764 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_3.json new file mode 100644 index 0000000000000000000000000000000000000000..978dacf412236f4f8b32d83a4f09ca4c549c9dca --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "MNLI crowdsource", + "acc": 0.349, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015080663991563098 + }, + { + "task_name": "anli_r1", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.341, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.01499813134840271 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_4.json new file mode 100644 index 0000000000000000000000000000000000000000..f6e0f3cc290fb1ad36622b6d64c75a4c4a8cc467 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "MNLI crowdsource", + "acc": 0.342, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.01500870618212173 + }, + { + "task_name": "anli_r1", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.339, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.014976758771620335 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_5.json new file mode 100644 index 0000000000000000000000000000000000000000..d834a864c5a7fb3597d37775aa052adaeb2f2de9 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_MNLI-crowdsource_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "MNLI crowdsource", + "acc": 0.34, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.014987482264363937 + }, + { + "task_name": "anli_r1", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.332, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.014899597242811483 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_0.json new file mode 100644 index 0000000000000000000000000000000000000000..8e82e2e5a9a8bbb4a9579f0cc97448965efe7be4 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "can we infer", + "acc": 0.34, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.014987482264363937 + }, + { + "task_name": "anli_r1", + "prompt_name": "can we infer", + "acc_norm": 0.329, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.01486539538592836 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_1.json new file mode 100644 index 0000000000000000000000000000000000000000..59c35cb4ee4774f16d90babd63461dd7822894ba --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "can we infer", + "acc": 0.33, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.014876872027456734 + }, + { + "task_name": "anli_r1", + "prompt_name": "can we infer", + "acc_norm": 0.343, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015019206922356953 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_2.json new file mode 100644 index 0000000000000000000000000000000000000000..d7dab0ac7d93e307a6c645caa72b57c38a91ef66 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "can we infer", + "acc": 0.351, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015100563798316405 + }, + { + "task_name": "anli_r1", + "prompt_name": "can we infer", + "acc_norm": 0.35, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015090650341444236 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_3.json new file mode 100644 index 0000000000000000000000000000000000000000..28dd309218cd14c284af2383d95f39f1a573f40e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "can we infer", + "acc": 0.363, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015213890444671285 + }, + { + "task_name": "anli_r1", + "prompt_name": "can we infer", + "acc_norm": 0.354, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015129868238451772 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_4.json new file mode 100644 index 0000000000000000000000000000000000000000..140c82a12ae2e9c039d098ef1d883bf69a5d770e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "can we infer", + "acc": 0.363, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.01521389044467128 + }, + { + "task_name": "anli_r1", + "prompt_name": "can we infer", + "acc_norm": 0.351, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015100563798316403 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_5.json new file mode 100644 index 0000000000000000000000000000000000000000..579c87393bffdae42183f67984740316db238b4a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_can-we-infer_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "can we infer", + "acc": 0.355, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015139491543780529 + }, + { + "task_name": "anli_r1", + "prompt_name": "can we infer", + "acc_norm": 0.351, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015100563798316402 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_0.json new file mode 100644 index 0000000000000000000000000000000000000000..a9497729baa4de0217ab211833be79c9d7c2d08e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.331, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.01488827258820394 + }, + { + "task_name": "anli_r1", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.322, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.014782913600996673 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_1.json new file mode 100644 index 0000000000000000000000000000000000000000..4a78dc4274c031fc103962df14c7def3ea002522 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.014910846164229863 + }, + { + "task_name": "anli_r1", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.014910846164229863 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_2.json new file mode 100644 index 0000000000000000000000000000000000000000..0990cb70d65b8799a4918a700bc23a840c96ab44 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.347, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.01506047203170662 + }, + { + "task_name": "anli_r1", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.348, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.01507060460376841 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_3.json new file mode 100644 index 0000000000000000000000000000000000000000..f1047edc18bb8032dbe059b80318a2d6cf809d65 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.347, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015060472031706618 + }, + { + "task_name": "anli_r1", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.35, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015090650341444236 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_4.json new file mode 100644 index 0000000000000000000000000000000000000000..023ee96ce0be59114d834022662d22a980d96693 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.344, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015029633724408948 + }, + { + "task_name": "anli_r1", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.343, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015019206922356951 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_5.json new file mode 100644 index 0000000000000000000000000000000000000000..dc0e68f51e83b021e86c0811f1122ba8d15fb0eb --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_guaranteed-possible-impossible_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.335, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.014933117490932575 + }, + { + "task_name": "anli_r1", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.336, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.014944140233795023 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_0.json new file mode 100644 index 0000000000000000000000000000000000000000..def4e4bac21fee1fc3fa5ad3bdebb61abf86898d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "justified in saying", + "acc": 0.341, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.014998131348402713 + }, + { + "task_name": "anli_r1", + "prompt_name": "justified in saying", + "acc_norm": 0.326, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.014830507204541035 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_1.json new file mode 100644 index 0000000000000000000000000000000000000000..3c976bfc2d65bdf1c52dab3eba9c3dda5491191e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "justified in saying", + "acc": 0.332, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.014899597242811483 + }, + { + "task_name": "anli_r1", + "prompt_name": "justified in saying", + "acc_norm": 0.337, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.014955087918653603 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_2.json new file mode 100644 index 0000000000000000000000000000000000000000..c80ac9baac5aa21dbc6501e8a718b47570614bb2 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "justified in saying", + "acc": 0.357, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015158521721486767 + }, + { + "task_name": "anli_r1", + "prompt_name": "justified in saying", + "acc_norm": 0.361, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.01519572011817511 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_3.json new file mode 100644 index 0000000000000000000000000000000000000000..e158e84653b5bb57906a30eff01745e2e1bd13a6 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "justified in saying", + "acc": 0.351, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015100563798316402 + }, + { + "task_name": "anli_r1", + "prompt_name": "justified in saying", + "acc_norm": 0.363, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015213890444671285 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_4.json new file mode 100644 index 0000000000000000000000000000000000000000..6633dbb04b92d3b23df1304c7a7b35260f3670e2 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "justified in saying", + "acc": 0.362, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015204840912919496 + }, + { + "task_name": "anli_r1", + "prompt_name": "justified in saying", + "acc_norm": 0.352, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015110404505648663 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_5.json new file mode 100644 index 0000000000000000000000000000000000000000..e25e8565fd5f443d8251ac4c3e93d03e611e2802 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r1_justified-in-saying_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r1", + "prompt_name": "justified in saying", + "acc": 0.35, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_stderr": 0.015090650341444233 + }, + { + "task_name": "anli_r1", + "prompt_name": "justified in saying", + "acc_norm": 0.349, + "dataset_path": "anli", + "dataset_name": null, + "subset": 1, + "acc_norm_stderr": 0.015080663991563104 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_0.json new file mode 100644 index 0000000000000000000000000000000000000000..2685d73a55543cebc2a42b9ed2c75fa72cd6a223 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "GPT-3 style", + "acc": 0.342, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.015008706182121734 + }, + { + "task_name": "anli_r2", + "prompt_name": "GPT-3 style", + "acc_norm": 0.354, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.015129868238451775 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_1.json new file mode 100644 index 0000000000000000000000000000000000000000..f296c5c4e1acde8d8f707704bffca9f85cfbf587 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "GPT-3 style", + "acc": 0.318, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014734079309311901 + }, + { + "task_name": "anli_r2", + "prompt_name": "GPT-3 style", + "acc_norm": 0.317, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014721675438880217 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f964d2cb6133c3393c694e9abee05e08d74c43a2 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "GPT-3 style", + "acc": 0.322, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014782913600996683 + }, + { + "task_name": "anli_r2", + "prompt_name": "GPT-3 style", + "acc_norm": 0.321, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014770821817934649 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_3.json new file mode 100644 index 0000000000000000000000000000000000000000..82e026f4f7ba417abc2f080977033cadfcf9c7df --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "GPT-3 style", + "acc": 0.329, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014865395385928373 + }, + { + "task_name": "anli_r2", + "prompt_name": "GPT-3 style", + "acc_norm": 0.325, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014818724459095524 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_4.json new file mode 100644 index 0000000000000000000000000000000000000000..fb8a261e76f8acc3a9629647009f8fffe2cc7469 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "GPT-3 style", + "acc": 0.333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014910846164229875 + }, + { + "task_name": "anli_r2", + "prompt_name": "GPT-3 style", + "acc_norm": 0.32, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014758652303574881 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_5.json new file mode 100644 index 0000000000000000000000000000000000000000..16ce13e4e8b416aa97e554a9bd3447b4ed343143 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_GPT-3-style_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "GPT-3 style", + "acc": 0.34, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014987482264363935 + }, + { + "task_name": "anli_r2", + "prompt_name": "GPT-3 style", + "acc_norm": 0.338, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014965960710224472 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_0.json new file mode 100644 index 0000000000000000000000000000000000000000..15ff4abeb16dd35312cac4f96a8eb01e5a41589e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "MNLI crowdsource", + "acc": 0.334, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014922019523732958 + }, + { + "task_name": "anli_r2", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.335, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014933117490932577 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_1.json new file mode 100644 index 0000000000000000000000000000000000000000..02a73da9d3fb0f79773b8cdbb8a43293b60d8e53 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "MNLI crowdsource", + "acc": 0.315, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014696631960792506 + }, + { + "task_name": "anli_r2", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.315, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014696631960792506 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_2.json new file mode 100644 index 0000000000000000000000000000000000000000..925618ad886a90da54bfa563186461994ea04bd7 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "MNLI crowdsource", + "acc": 0.321, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.01477082181793465 + }, + { + "task_name": "anli_r2", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.338, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.01496596071022447 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_3.json new file mode 100644 index 0000000000000000000000000000000000000000..0e0bc85584a7f545f3208a42fdd06d2b9384b32b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "MNLI crowdsource", + "acc": 0.309, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014619600977206491 + }, + { + "task_name": "anli_r2", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.311, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014645596385722694 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_4.json new file mode 100644 index 0000000000000000000000000000000000000000..75efc164df36dda096fb2c713c1e87a0186d76a7 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "MNLI crowdsource", + "acc": 0.291, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014370995982377939 + }, + { + "task_name": "anli_r2", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.3, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014498627873361425 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_5.json new file mode 100644 index 0000000000000000000000000000000000000000..40cebcaa9d0921c46b6d286fd5d424301f080d1e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_MNLI-crowdsource_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "MNLI crowdsource", + "acc": 0.325, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014818724459095524 + }, + { + "task_name": "anli_r2", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.321, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014770821817934645 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_0.json new file mode 100644 index 0000000000000000000000000000000000000000..27df43dafe882617260f237d7b9a6e7a0bf30214 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "can we infer", + "acc": 0.336, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.01494414023379502 + }, + { + "task_name": "anli_r2", + "prompt_name": "can we infer", + "acc_norm": 0.345, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.015039986742055242 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_1.json new file mode 100644 index 0000000000000000000000000000000000000000..d21195e000d219eda820d274e8a0342b7b3635a8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "can we infer", + "acc": 0.331, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014888272588203928 + }, + { + "task_name": "anli_r2", + "prompt_name": "can we infer", + "acc_norm": 0.33, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.01487687202745673 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_2.json new file mode 100644 index 0000000000000000000000000000000000000000..cd7d59664483aafe0f379c9e26981635fc6c39d1 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "can we infer", + "acc": 0.327, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014842213153411237 + }, + { + "task_name": "anli_r2", + "prompt_name": "can we infer", + "acc_norm": 0.341, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014998131348402718 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_3.json new file mode 100644 index 0000000000000000000000000000000000000000..57f41c2815b50f00cef18589fd0964c701637537 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "can we infer", + "acc": 0.332, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014899597242811492 + }, + { + "task_name": "anli_r2", + "prompt_name": "can we infer", + "acc_norm": 0.326, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014830507204541031 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_4.json new file mode 100644 index 0000000000000000000000000000000000000000..3c30998b0726f1565a5ba89076758e31b283f928 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "can we infer", + "acc": 0.321, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014770821817934652 + }, + { + "task_name": "anli_r2", + "prompt_name": "can we infer", + "acc_norm": 0.331, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.01488827258820393 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_5.json new file mode 100644 index 0000000000000000000000000000000000000000..9ed757677abd480d2d252e37b29b9be3e9e1e51f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_can-we-infer_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "can we infer", + "acc": 0.331, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014888272588203924 + }, + { + "task_name": "anli_r2", + "prompt_name": "can we infer", + "acc_norm": 0.338, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.01496596071022447 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_0.json new file mode 100644 index 0000000000000000000000000000000000000000..3020cf0152b4e702b78c8808dd30fdb8cb8608f6 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.329, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014865395385928362 + }, + { + "task_name": "anli_r2", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.336, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014944140233795025 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_1.json new file mode 100644 index 0000000000000000000000000000000000000000..91a1fda11b18e80bec658fbf4a52c4851c332134 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.315, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014696631960792506 + }, + { + "task_name": "anli_r2", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.315, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014696631960792506 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_2.json new file mode 100644 index 0000000000000000000000000000000000000000..1d4d75778c27dc9da5f4281f27c0f6357588b523 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.326, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.01483050720454103 + }, + { + "task_name": "anli_r2", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.326, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014830507204541031 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_3.json new file mode 100644 index 0000000000000000000000000000000000000000..9c2228dbccc63541e66d699540d8fc1eb7462a30 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.321, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014770821817934649 + }, + { + "task_name": "anli_r2", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.321, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014770821817934649 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_4.json new file mode 100644 index 0000000000000000000000000000000000000000..291e069de7b750fd8cf4d78ef0425ae966b37672 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.313, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014671272822977883 + }, + { + "task_name": "anli_r2", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.314, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014683991951087973 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_5.json new file mode 100644 index 0000000000000000000000000000000000000000..025ef467b3825d5318a4ff7b2060c412d4003df9 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_guaranteed-possible-impossible_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.314, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014683991951087973 + }, + { + "task_name": "anli_r2", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.312, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014658474370509012 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_0.json new file mode 100644 index 0000000000000000000000000000000000000000..3d3d8c750df6c485ed73a37eeac4002d88affd52 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "justified in saying", + "acc": 0.335, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014933117490932577 + }, + { + "task_name": "anli_r2", + "prompt_name": "justified in saying", + "acc_norm": 0.353, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.01512017260548369 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_1.json new file mode 100644 index 0000000000000000000000000000000000000000..ad348ea23ad3ec091e8abbb2ba33be3a37cb153a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "justified in saying", + "acc": 0.322, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014782913600996674 + }, + { + "task_name": "anli_r2", + "prompt_name": "justified in saying", + "acc_norm": 0.326, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014830507204541033 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_2.json new file mode 100644 index 0000000000000000000000000000000000000000..44c348f9b5573bdb9d26422773af68d1889ee828 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "justified in saying", + "acc": 0.331, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014888272588203924 + }, + { + "task_name": "anli_r2", + "prompt_name": "justified in saying", + "acc_norm": 0.333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014910846164229876 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_3.json new file mode 100644 index 0000000000000000000000000000000000000000..7b07303ac9684ccd0097dbd86c029f831da46b85 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "justified in saying", + "acc": 0.325, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014818724459095526 + }, + { + "task_name": "anli_r2", + "prompt_name": "justified in saying", + "acc_norm": 0.322, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014782913600996683 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_4.json new file mode 100644 index 0000000000000000000000000000000000000000..c291ed1cca09fbb6a9218c1a2c26efc4a0bc5064 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "justified in saying", + "acc": 0.314, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014683991951087974 + }, + { + "task_name": "anli_r2", + "prompt_name": "justified in saying", + "acc_norm": 0.32, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014758652303574888 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_5.json new file mode 100644 index 0000000000000000000000000000000000000000..dcfe32b2c26ad5e78d302ea38ffa823e7410d1ff --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r2_justified-in-saying_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r2", + "prompt_name": "justified in saying", + "acc": 0.329, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_stderr": 0.014865395385928373 + }, + { + "task_name": "anli_r2", + "prompt_name": "justified in saying", + "acc_norm": 0.325, + "dataset_path": "anli", + "dataset_name": null, + "subset": 2, + "acc_norm_stderr": 0.014818724459095524 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_0.json new file mode 100644 index 0000000000000000000000000000000000000000..31acca9a4229a69d2aa8d6cc615f63c6f4ecae53 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "GPT-3 style", + "acc": 0.32916666666666666, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013570806258433625 + }, + { + "task_name": "anli_r3", + "prompt_name": "GPT-3 style", + "acc_norm": 0.325, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013526454480351028 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_1.json new file mode 100644 index 0000000000000000000000000000000000000000..087544002c3398a124324927e261ca68bee8c78f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "GPT-3 style", + "acc": 0.35, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013774667009018558 + }, + { + "task_name": "anli_r3", + "prompt_name": "GPT-3 style", + "acc_norm": 0.3375, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013655897185463652 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_2.json new file mode 100644 index 0000000000000000000000000000000000000000..adce017f66795a5638af7327d249fce62a436e34 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "GPT-3 style", + "acc": 0.33666666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013647602942406387 + }, + { + "task_name": "anli_r3", + "prompt_name": "GPT-3 style", + "acc_norm": 0.3325, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013605417345710528 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_3.json new file mode 100644 index 0000000000000000000000000000000000000000..382bb2d8953a45c8e6252a8b9f5f15922b9e0946 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "GPT-3 style", + "acc": 0.3516666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013789711695404785 + }, + { + "task_name": "anli_r3", + "prompt_name": "GPT-3 style", + "acc_norm": 0.3566666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013833742805050722 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_4.json new file mode 100644 index 0000000000000000000000000000000000000000..4af9f50c0ee09ded17b0d78b217aa9053e1f71ac --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "GPT-3 style", + "acc": 0.3466666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013744022550571946 + }, + { + "task_name": "anli_r3", + "prompt_name": "GPT-3 style", + "acc_norm": 0.3283333333333333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013562032919529019 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_5.json new file mode 100644 index 0000000000000000000000000000000000000000..589406c1457c72deb8dfecbe837d66ed58fa73dc --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_GPT-3-style_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "GPT-3 style", + "acc": 0.3325, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013605417345710526 + }, + { + "task_name": "anli_r3", + "prompt_name": "GPT-3 style", + "acc_norm": 0.33916666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.01367234349168182 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_0.json new file mode 100644 index 0000000000000000000000000000000000000000..e21213d242ddd8586f15cde6b1e2a898d5b102fb --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "MNLI crowdsource", + "acc": 0.335, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013630871843821476 + }, + { + "task_name": "anli_r3", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.3433333333333333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.01371263383046586 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_1.json new file mode 100644 index 0000000000000000000000000000000000000000..5ce127d3641cf804cc57e61808152105dc2b4785 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "MNLI crowdsource", + "acc": 0.33666666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013647602942406393 + }, + { + "task_name": "anli_r3", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.33666666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013647602942406393 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_2.json new file mode 100644 index 0000000000000000000000000000000000000000..e399d93c96f5439cdf3923cf762fb49807aad83f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "MNLI crowdsource", + "acc": 0.32666666666666666, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013544340907003665 + }, + { + "task_name": "anli_r3", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.32916666666666666, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013570806258433633 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_3.json new file mode 100644 index 0000000000000000000000000000000000000000..35f56a00f4b1282cc71786e1dee64c4951341b5c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "MNLI crowdsource", + "acc": 0.33916666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013672343491681808 + }, + { + "task_name": "anli_r3", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.325, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013526454480351018 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_4.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbb73f5edf41f6fb1fbc4163d3eb7a8d958205a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "MNLI crowdsource", + "acc": 0.3225, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013499258621103249 + }, + { + "task_name": "anli_r3", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.32, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013471620929769139 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_5.json new file mode 100644 index 0000000000000000000000000000000000000000..b5ab61b04c440a1759467f6474f0504e9cba75ae --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_MNLI-crowdsource_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "MNLI crowdsource", + "acc": 0.3258333333333333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013535422043417464 + }, + { + "task_name": "anli_r3", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.31333333333333335, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013395739415639082 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_0.json new file mode 100644 index 0000000000000000000000000000000000000000..922c72d88021dd4fd3a5ebfd9fb5b33a8744c11b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "can we infer", + "acc": 0.3475, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013751753243291852 + }, + { + "task_name": "anli_r3", + "prompt_name": "can we infer", + "acc_norm": 0.3408333333333333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.01368860079329693 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_1.json new file mode 100644 index 0000000000000000000000000000000000000000..3622ecdea0b3c98e82a592fd99adf18d6b98eb49 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "can we infer", + "acc": 0.3525, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.01379716491891836 + }, + { + "task_name": "anli_r3", + "prompt_name": "can we infer", + "acc_norm": 0.34, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013680495725767797 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_2.json new file mode 100644 index 0000000000000000000000000000000000000000..01511451b6b43993a69d5892a313993ba7aa1993 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "can we infer", + "acc": 0.335, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013630871843821469 + }, + { + "task_name": "anli_r3", + "prompt_name": "can we infer", + "acc_norm": 0.33, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013579531277800917 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_3.json new file mode 100644 index 0000000000000000000000000000000000000000..53bb245f9c04c6aabc834f32724a40922c9a05e1 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "can we infer", + "acc": 0.345, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013728421539454876 + }, + { + "task_name": "anli_r3", + "prompt_name": "can we infer", + "acc_norm": 0.3375, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013655897185463664 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_4.json new file mode 100644 index 0000000000000000000000000000000000000000..99f3b51486822e053c1e060e51fec72697a7e813 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "can we infer", + "acc": 0.32666666666666666, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013544340907003663 + }, + { + "task_name": "anli_r3", + "prompt_name": "can we infer", + "acc_norm": 0.31, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013356596331200263 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_5.json new file mode 100644 index 0000000000000000000000000000000000000000..634fb322b12c42b9f16ba0dc5c63606550b1adcd --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_can-we-infer_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "can we infer", + "acc": 0.3175, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013443538681348052 + }, + { + "task_name": "anli_r3", + "prompt_name": "can we infer", + "acc_norm": 0.32666666666666666, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013544340907003663 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_0.json new file mode 100644 index 0000000000000000000000000000000000000000..40db948a1299349f8f093b8159c67df68f85e022 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.33166666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013596836729485166 + }, + { + "task_name": "anli_r3", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.31416666666666665, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.0134053993149841 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_1.json new file mode 100644 index 0000000000000000000000000000000000000000..cf199d29edfd0d72dbc07d176b0f0c882d802062 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.33666666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013647602942406393 + }, + { + "task_name": "anli_r3", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.33666666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013647602942406393 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_2.json new file mode 100644 index 0000000000000000000000000000000000000000..0afc4ca80d9469d2b3e003a60a39461f7fc2fe7b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.32166666666666666, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013490095282989521 + }, + { + "task_name": "anli_r3", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.32, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013471620929769142 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_3.json new file mode 100644 index 0000000000000000000000000000000000000000..4033016769e8bbd7fce900562fdc65a87240ffa4 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.3416666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013696658778002515 + }, + { + "task_name": "anli_r3", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.3425, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013704669762934728 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_4.json new file mode 100644 index 0000000000000000000000000000000000000000..e505f94bf89bac9785967c8eeaa23d636103bc8b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.33416666666666667, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.01362243481313678 + }, + { + "task_name": "anli_r3", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.3308333333333333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013588208070708993 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_5.json new file mode 100644 index 0000000000000000000000000000000000000000..bed7ee887a0d723481c43e84cddc7e8d0ee344a3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_guaranteed-possible-impossible_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.31166666666666665, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013376268790982096 + }, + { + "task_name": "anli_r3", + "prompt_name": "guaranteed/possible/impossible", + "acc_norm": 0.31083333333333335, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.01336645784596543 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_0.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_0.json new file mode 100644 index 0000000000000000000000000000000000000000..ac26ec926b892903140227f71a47c5d59a96e444 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "justified in saying", + "acc": 0.3475, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013751753243291852 + }, + { + "task_name": "anli_r3", + "prompt_name": "justified in saying", + "acc_norm": 0.3408333333333333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013688600793296934 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_1.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_1.json new file mode 100644 index 0000000000000000000000000000000000000000..157e8d8b392f32a1c3cb04b47434ae9b55a978df --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "justified in saying", + "acc": 0.3383333333333333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013664144006618268 + }, + { + "task_name": "anli_r3", + "prompt_name": "justified in saying", + "acc_norm": 0.33, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013579531277800922 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_2.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_2.json new file mode 100644 index 0000000000000000000000000000000000000000..280eb2e99f035eba4973eb8f6b6f76f7e90537ed --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "justified in saying", + "acc": 0.3258333333333333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.01353542204341746 + }, + { + "task_name": "anli_r3", + "prompt_name": "justified in saying", + "acc_norm": 0.33, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013579531277800917 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_3.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_3.json new file mode 100644 index 0000000000000000000000000000000000000000..b36c0c112d80db1e930398ce77bccb7990b6f732 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "justified in saying", + "acc": 0.335, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.01363087184382147 + }, + { + "task_name": "anli_r3", + "prompt_name": "justified in saying", + "acc_norm": 0.32916666666666666, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013570806258433625 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_4.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_4.json new file mode 100644 index 0000000000000000000000000000000000000000..99de6fa19a15ea19d892143ea1e1d6034ae74f3e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "justified in saying", + "acc": 0.3258333333333333, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013535422043417462 + }, + { + "task_name": "anli_r3", + "prompt_name": "justified in saying", + "acc_norm": 0.3175, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013443538681348052 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_5.json b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_5.json new file mode 100644 index 0000000000000000000000000000000000000000..497be9368a197a9872732bdc6a5213b08ba1efb6 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_anli_r3_justified-in-saying_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "anli_r3", + "prompt_name": "justified in saying", + "acc": 0.31333333333333335, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_stderr": 0.013395739415639082 + }, + { + "task_name": "anli_r3", + "prompt_name": "justified in saying", + "acc_norm": 0.32166666666666666, + "dataset_path": "anli", + "dataset_name": null, + "subset": 3, + "acc_norm_stderr": 0.013490095282989521 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_0.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_0.json new file mode 100644 index 0000000000000000000000000000000000000000..f9fc1cde104b004d2327cc4c207fd95537911dd4 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc": 0.22013651877133106, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.01210812488346098 + }, + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc_norm": 0.22013651877133106, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.01210812488346098 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_1.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_1.json new file mode 100644 index 0000000000000000000000000000000000000000..d2f542b7daf325515ad76e55ac78ab9793a69eaa --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc": 0.2363481228668942, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.01241496052430183 + }, + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc_norm": 0.2363481228668942, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.01241496052430183 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_2.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_2.json new file mode 100644 index 0000000000000000000000000000000000000000..89199756b82669c5622f76f10deeaea5e554b83c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc": 0.2354948805460751, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012399451855004748 + }, + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc_norm": 0.2354948805460751, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012399451855004748 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_3.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_3.json new file mode 100644 index 0000000000000000000000000000000000000000..48ac9d2c7cbd625cedfdbfa43abab073c6349ec7 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc": 0.2508532423208191, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012668198621315433 + }, + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc_norm": 0.2508532423208191, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012668198621315433 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_4.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_4.json new file mode 100644 index 0000000000000000000000000000000000000000..285d57a6b08e6f9e735e0efa95fc8e98b39eaf7b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc": 0.2593856655290102, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012808273573927094 + }, + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc_norm": 0.2593856655290102, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012808273573927094 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_5.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_5.json new file mode 100644 index 0000000000000000000000000000000000000000..b29c534fbb12a870aa30f2ef40798773f4dcd09b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_heres_a_problem_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc": 0.24488054607508533, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012566273985131358 + }, + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc_norm": 0.24488054607508533, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012566273985131358 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_0.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_0.json new file mode 100644 index 0000000000000000000000000000000000000000..d8360f90e9e64e3115ed5e0bc93327548e076b3b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc": 0.2098976109215017, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.011900548748047446 + }, + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.2619453924914676, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012849054826858117 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_1.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_1.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2c957888b90b8f05d5d52f98d473319c0e133d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc": 0.2150170648464164, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012005717634133611 + }, + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.2593856655290102, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012808273573927095 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_2.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_2.json new file mode 100644 index 0000000000000000000000000000000000000000..bb7032bc69b69568efb65e2193ad9f5ba83f0de2 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc": 0.2150170648464164, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012005717634133608 + }, + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.2645051194539249, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012889272949313364 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_3.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_3.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a210457e2d0fe7add15f0cbd96c5ea98748651 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc": 0.2150170648464164, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012005717634133608 + }, + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.2645051194539249, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012889272949313364 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_4.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_4.json new file mode 100644 index 0000000000000000000000000000000000000000..7c35384afcb353f5ddf21a0958d7b30b7ee85ecb --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc": 0.22525597269624573, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.0122078399954073 + }, + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.26791808873720135, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012942030195136425 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_5.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_5.json new file mode 100644 index 0000000000000000000000000000000000000000..c98602ab735e65bb2113961439f0b7dcb7d1e699 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_i_am_hesitating_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc": 0.23293515358361774, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012352507042617405 + }, + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.257679180887372, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012780770562768414 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_0.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_0.json new file mode 100644 index 0000000000000000000000000000000000000000..551f5dfdfbe48540815babd98e43411aae045999 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc": 0.2235494880546075, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012174896631202612 + }, + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc_norm": 0.27303754266211605, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.013019332762635734 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_1.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_1.json new file mode 100644 index 0000000000000000000000000000000000000000..d3dbc376b1d2f96775f85a9407adf96484ba8321 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc": 0.22098976109215018, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012124929206818258 + }, + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc_norm": 0.25597269624573377, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012753013241244513 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_2.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_2.json new file mode 100644 index 0000000000000000000000000000000000000000..cddc3898bf17a29fa7c9f8545cf77178fc34dccf --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc": 0.20733788395904437, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.011846905782971385 + }, + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc_norm": 0.24914675767918087, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012639407111926432 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_3.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_3.json new file mode 100644 index 0000000000000000000000000000000000000000..9a03406dc4b2cb4c0f4596d7604cf4cb6598693a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc": 0.22440273037542663, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012191404938603838 + }, + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc_norm": 0.25, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012653835621466646 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_4.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_4.json new file mode 100644 index 0000000000000000000000000000000000000000..73420c7747cfcec2e9526931d072589b807ec599 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc": 0.22525597269624573, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012207839995407307 + }, + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc_norm": 0.25, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012653835621466646 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_5.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_5.json new file mode 100644 index 0000000000000000000000000000000000000000..0001b08372901c7e5cb9ca4f1077f78aa047aade --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_multiple_choice_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc": 0.2030716723549488, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.011755899303705582 + }, + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc_norm": 0.24658703071672355, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.01259572626879013 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_0.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_0.json new file mode 100644 index 0000000000000000000000000000000000000000..5660f0c7b6542d3fe2c7ea7432c018f8aa79d924 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc": 0.22013651877133106, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012108124883460976 + }, + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc_norm": 0.22013651877133106, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012108124883460976 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_1.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_1.json new file mode 100644 index 0000000000000000000000000000000000000000..6de2d60a8794e7c213ddc894c778ab9b5a9f62ab --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc": 0.2354948805460751, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012399451855004746 + }, + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc_norm": 0.2354948805460751, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012399451855004746 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_2.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_2.json new file mode 100644 index 0000000000000000000000000000000000000000..7c6618ce6c211845fd6a902060a283485ef8c912 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc": 0.23122866894197952, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012320858834772273 + }, + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc_norm": 0.23122866894197952, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012320858834772273 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_3.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_3.json new file mode 100644 index 0000000000000000000000000000000000000000..75fe2efbc316585f76ea057ac281c6b0e27cbacc --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc": 0.2380546075085324, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012445770028026206 + }, + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc_norm": 0.2380546075085324, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012445770028026206 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_4.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_4.json new file mode 100644 index 0000000000000000000000000000000000000000..7f18e8e7082d3ca5eee2450883c33b03c931020f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc": 0.24914675767918087, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012639407111926435 + }, + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc_norm": 0.24914675767918087, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012639407111926435 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_5.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_5.json new file mode 100644 index 0000000000000000000000000000000000000000..2f3478636001dc997b7bee6dd59f02e1bfdc283e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_pick_the_most_correct_option_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc": 0.2440273037542662, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012551447627856255 + }, + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc_norm": 0.2440273037542662, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012551447627856255 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_0.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_0.json new file mode 100644 index 0000000000000000000000000000000000000000..d4ea4d007a3876c272119053e248eb2454d11b6b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc": 0.20477815699658702, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.0117925443385134 + }, + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc_norm": 0.2619453924914676, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012849054826858117 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_1.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_1.json new file mode 100644 index 0000000000000000000000000000000000000000..982cb60d8b7569332d0c1961ff397c25ddadb88c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc": 0.2098976109215017, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.011900548748047452 + }, + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc_norm": 0.2525597269624573, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.01269672898020771 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_2.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_2.json new file mode 100644 index 0000000000000000000000000000000000000000..e0e3087a1494df623d139e326be350095458aafa --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc": 0.21160409556313994, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.011935916358632863 + }, + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc_norm": 0.2636518771331058, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012875929151297056 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_3.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_3.json new file mode 100644 index 0000000000000000000000000000000000000000..e8bd5817799ac086b81c1991d5cfd3003ad42da9 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc": 0.2090443686006826, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.011882746987406448 + }, + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc_norm": 0.26706484641638223, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.01292893319649635 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_4.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_4.json new file mode 100644 index 0000000000000000000000000000000000000000..fcb447ab542abe0c36b64e151c0dc3016d7b89ea --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc": 0.22866894197952217, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012272853582540807 + }, + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc_norm": 0.27047781569965873, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.012980954547659556 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_5.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_5.json new file mode 100644 index 0000000000000000000000000000000000000000..75ff7c2f6b531d7ef46d8d6d576e00cea111969f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_challenge_qa_options_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc": 0.22098976109215018, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_stderr": 0.012124929206818258 + }, + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc_norm": 0.2627986348122867, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Challenge", + "subset": null, + "acc_norm_stderr": 0.01286252317535133 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_0.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_0.json new file mode 100644 index 0000000000000000000000000000000000000000..1db03b6da44bb7b59a4e4b51c2ffa1fc77cb1a27 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc": 0.25673400673400676, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.008963590834042407 + }, + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc_norm": 0.25673400673400676, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.008963590834042407 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_1.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_1.json new file mode 100644 index 0000000000000000000000000000000000000000..36d1cb6456baae4f2704451124ca9bd60f3383f8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc": 0.2478956228956229, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.008860162361464028 + }, + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc_norm": 0.2478956228956229, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.008860162361464028 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_2.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ec7deb30e63d9da50f68c1391d2e2b395b0460c1 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc": 0.26136363636363635, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.00901583836660821 + }, + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc_norm": 0.26136363636363635, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.00901583836660821 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_3.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_3.json new file mode 100644 index 0000000000000000000000000000000000000000..b5119edbd855823e57373b1ade16d4ae6b6e11bd --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc": 0.2588383838383838, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.008987501845758047 + }, + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc_norm": 0.2588383838383838, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.008987501845758047 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_4.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_4.json new file mode 100644 index 0000000000000000000000000000000000000000..e41c23cea7e924fa55b38389d5353201c0d0685b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc": 0.25084175084175087, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.008895183010487386 + }, + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc_norm": 0.25084175084175087, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.008895183010487386 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_5.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_5.json new file mode 100644 index 0000000000000000000000000000000000000000..901f8df6f504e544c1aa0d30a568f699a31088b3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_heres_a_problem_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc": 0.2622053872053872, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009025197991724831 + }, + { + "task_name": "arc_easy", + "prompt_name": "heres_a_problem", + "acc_norm": 0.2622053872053872, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009025197991724831 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_0.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_0.json new file mode 100644 index 0000000000000000000000000000000000000000..70b8a6aac73474982bf844a793df39b0a2e39d04 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc": 0.3021885521885522, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009422719042483188 + }, + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.2866161616161616, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009278551100969298 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_1.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_1.json new file mode 100644 index 0000000000000000000000000000000000000000..fb69bebe433dcdaadf444fc81d665ff5f5184db5 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc": 0.2828282828282828, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009241472775328228 + }, + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.2840909090909091, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009253921261885763 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_2.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_2.json new file mode 100644 index 0000000000000000000000000000000000000000..b419cba9906cc06035f1486a3132d187e7aebc78 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc": 0.2824074074074074, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009237303403479327 + }, + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.2765151515151515, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.00917788010146828 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_3.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_3.json new file mode 100644 index 0000000000000000000000000000000000000000..13d72763eace307e0f039393b458807d5aa38bc8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc": 0.27735690235690236, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009186490105111902 + }, + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.27735690235690236, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009186490105111899 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_4.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b9a1ea14a46894820a6669ff81effc34a3b74155 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc": 0.27441077441077444, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009156177122244532 + }, + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.2777777777777778, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009190779909649918 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_5.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_5.json new file mode 100644 index 0000000000000000000000000000000000000000..5e5ee3d251c21e5fc554bbe2c5ef105bb0fa4cce --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_i_am_hesitating_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc": 0.27441077441077444, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009156177122244532 + }, + { + "task_name": "arc_easy", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.27525252525252525, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009164888895174743 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_0.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_0.json new file mode 100644 index 0000000000000000000000000000000000000000..a61de562e084cb43ef0f8b9b8dae112dbda3ec06 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc": 0.2895622895622896, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009306838912173902 + }, + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc_norm": 0.2706228956228956, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009116466166403821 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_1.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_1.json new file mode 100644 index 0000000000000000000000000000000000000000..538da088e00d6a5a63e256ffed8078905b408a1d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc": 0.2781986531986532, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009195059601583901 + }, + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc_norm": 0.27525252525252525, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009164888895174743 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_2.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ae13aff9e95e27517e0a04f99c0efa4df6e21e1b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc": 0.2760942760942761, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009173559873835262 + }, + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc_norm": 0.2718855218855219, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009129795867310489 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_3.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_3.json new file mode 100644 index 0000000000000000000000000000000000000000..93b5a1ccce4d1c9730eb2d9f912de0c6dbad5f5c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc": 0.2697811447811448, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009107527914671064 + }, + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc_norm": 0.2748316498316498, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009160538115254952 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_4.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_4.json new file mode 100644 index 0000000000000000000000000000000000000000..33a8c23e53641b22a924b249b8d33946f8f98129 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc": 0.26262626262626265, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.00902986177676375 + }, + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc_norm": 0.26052188552188554, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009006435890336588 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_5.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_5.json new file mode 100644 index 0000000000000000000000000000000000000000..02c7b232a73ada0412ba667a965f7a180ae9f976 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_multiple_choice_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc": 0.2588383838383838, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.00898750184575805 + }, + { + "task_name": "arc_easy", + "prompt_name": "multiple_choice", + "acc_norm": 0.2563131313131313, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.008958775997918368 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_0.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_0.json new file mode 100644 index 0000000000000000000000000000000000000000..9d71a75d5bbfaabc01d128aea822c577037ffbb5 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc": 0.2478956228956229, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.008860162361464027 + }, + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc_norm": 0.2478956228956229, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.008860162361464027 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_1.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_1.json new file mode 100644 index 0000000000000000000000000000000000000000..67f302f95a6a835af05f4eb2cb4737e7a5e208b2 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc": 0.25, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.008885233166386385 + }, + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc_norm": 0.25, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.008885233166386385 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_2.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8992477f4a00454d42e10690b3fd149d25d3c21a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc": 0.26641414141414144, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009071357971078681 + }, + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc_norm": 0.26641414141414144, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009071357971078681 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_3.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_3.json new file mode 100644 index 0000000000000000000000000000000000000000..75261ae7dfdd1c78ff6bd9cf8c516ac451fa0d3f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc": 0.2588383838383838, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.008987501845758049 + }, + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc_norm": 0.2588383838383838, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.008987501845758049 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_4.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_4.json new file mode 100644 index 0000000000000000000000000000000000000000..cfb1ea1d41cfb4c495abcd784a5a230430e0e1fb --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc": 0.25084175084175087, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.008895183010487386 + }, + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc_norm": 0.25084175084175087, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.008895183010487386 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_5.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_5.json new file mode 100644 index 0000000000000000000000000000000000000000..e9edf2665e93a9bc86479d5f01924036c55a87c8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_pick_the_most_correct_option_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc": 0.2537878787878788, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.008929657065808293 + }, + { + "task_name": "arc_easy", + "prompt_name": "pick_the_most_correct_option", + "acc_norm": 0.2537878787878788, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.008929657065808293 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_0.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_0.json new file mode 100644 index 0000000000000000000000000000000000000000..103d26eaff0b78d2f7db329a175269ec127055f5 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc": 0.30092592592592593, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009411516193787188 + }, + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc_norm": 0.2845117845117845, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009258050925618823 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_1.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_1.json new file mode 100644 index 0000000000000000000000000000000000000000..06cdcc8eaa00f677b48ffedfbb704ce9471657fa --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc": 0.29419191919191917, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009350328648861737 + }, + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc_norm": 0.28324915824915825, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009245632200075453 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_2.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f440aea464013a091408b6f39d77567d42f67f9e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc": 0.289983164983165, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009310840970769035 + }, + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc_norm": 0.2777777777777778, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009190779909649923 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_3.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_3.json new file mode 100644 index 0000000000000000000000000000000000000000..678fcb24a5a1de626a11fda118e59a20bb395b65 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc": 0.28619528619528617, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009274470774627732 + }, + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc_norm": 0.2798821548821549, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009212077524656529 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_4.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_4.json new file mode 100644 index 0000000000000000000000000000000000000000..6ee169865ccafe2ed9ae6e17978b11113fe71392 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc": 0.2828282828282828, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009241472775328228 + }, + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc_norm": 0.2777777777777778, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.009190779909649916 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_5.json b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_5.json new file mode 100644 index 0000000000000000000000000000000000000000..2302d30d5e5177019abbf7c9143fa82682001d79 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_arc_easy_qa_options_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc": 0.2760942760942761, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_stderr": 0.009173559873835264 + }, + { + "task_name": "arc_easy", + "prompt_name": "qa_options", + "acc_norm": 0.2786195286195286, + "dataset_path": "ai2_arc", + "dataset_name": "ARC-Easy", + "subset": null, + "acc_norm_stderr": 0.00919932919502635 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_0.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_0.json new file mode 100644 index 0000000000000000000000000000000000000000..761c9835f6a9b46328dd1c847a9ed412af3d898c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "GPT-3 Style", + "acc": 0.5703333333333334, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009039449353930876 + }, + { + "task_name": "boolq", + "prompt_name": "GPT-3 Style", + "acc_norm": 0.6243333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.008843442555522142 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_1.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_1.json new file mode 100644 index 0000000000000000000000000000000000000000..8832831cc62409df77571a883e3cb0aee03972bb --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "GPT-3 Style", + "acc": 0.536, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009106534814375936 + }, + { + "task_name": "boolq", + "prompt_name": "GPT-3 Style", + "acc_norm": 0.576, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009024142344197916 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_2.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_2.json new file mode 100644 index 0000000000000000000000000000000000000000..42ed9a468cdb68cf3cb1c2c13aeeaefa8b8483e3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "GPT-3 Style", + "acc": 0.5546666666666666, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009075496684215473 + }, + { + "task_name": "boolq", + "prompt_name": "GPT-3 Style", + "acc_norm": 0.5843333333333334, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.00899942365775079 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_3.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_3.json new file mode 100644 index 0000000000000000000000000000000000000000..53fb27270a862f54f2e41149276784eff357c2f0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "GPT-3 Style", + "acc": 0.5446666666666666, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009093726495969151 + }, + { + "task_name": "boolq", + "prompt_name": "GPT-3 Style", + "acc_norm": 0.5836666666666667, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009001498317147612 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_4.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_4.json new file mode 100644 index 0000000000000000000000000000000000000000..bd92b7f649ef6f8842a9ef45d17c06beb0a84cf0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "GPT-3 Style", + "acc": 0.551, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009082611478924378 + }, + { + "task_name": "boolq", + "prompt_name": "GPT-3 Style", + "acc_norm": 0.58, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009012606487132153 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_5.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_5.json new file mode 100644 index 0000000000000000000000000000000000000000..e31f8361a8608e1d9b5bd6a60c94864227a6b61d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_GPT-3-Style_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "GPT-3 Style", + "acc": 0.5473333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009089227499483243 + }, + { + "task_name": "boolq", + "prompt_name": "GPT-3 Style", + "acc_norm": 0.5796666666666667, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009013590979636827 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_0.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_0.json new file mode 100644 index 0000000000000000000000000000000000000000..670061f822431760e9c9b2bbd88e66c907ce13eb --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "after_reading", + "acc": 0.6236666666666667, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.008846558976258922 + }, + { + "task_name": "boolq", + "prompt_name": "after_reading", + "acc_norm": 0.5433333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009095877403306734 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_1.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_1.json new file mode 100644 index 0000000000000000000000000000000000000000..ca2593d7d89203f6afd13e68b4b9413baacd5b52 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "after_reading", + "acc": 0.575, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.00902693165837962 + }, + { + "task_name": "boolq", + "prompt_name": "after_reading", + "acc_norm": 0.5653333333333334, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009051951785603833 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_2.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_2.json new file mode 100644 index 0000000000000000000000000000000000000000..53beda873e9cc8ba4b4d4b3f5d918e8b4c716379 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "after_reading", + "acc": 0.593, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.00897090625594853 + }, + { + "task_name": "boolq", + "prompt_name": "after_reading", + "acc_norm": 0.5863333333333334, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.008993097934268066 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_3.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_3.json new file mode 100644 index 0000000000000000000000000000000000000000..d0318176b500506caffea37c0f83e4c722eef153 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "after_reading", + "acc": 0.5793333333333334, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.00901457125468042 + }, + { + "task_name": "boolq", + "prompt_name": "after_reading", + "acc_norm": 0.571, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.00903771136639389 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_4.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_4.json new file mode 100644 index 0000000000000000000000000000000000000000..98472bc0779c99ef455acf336f06bd2b1aff57aa --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "after_reading", + "acc": 0.5696666666666667, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009041170615977855 + }, + { + "task_name": "boolq", + "prompt_name": "after_reading", + "acc_norm": 0.5556666666666666, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009073469569248527 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_5.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_5.json new file mode 100644 index 0000000000000000000000000000000000000000..08fecf2559c4c04a59d2f1d6b7a0a96b6348ab18 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_after_reading_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "after_reading", + "acc": 0.5663333333333334, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009049526374650807 + }, + { + "task_name": "boolq", + "prompt_name": "after_reading", + "acc_norm": 0.5553333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009074149407514857 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_0.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_0.json new file mode 100644 index 0000000000000000000000000000000000000000..4662aca7a89770508c8f042d197b562dc4d38d50 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "exercise", + "acc": 0.6233333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.00884811049411477 + }, + { + "task_name": "boolq", + "prompt_name": "exercise", + "acc_norm": 0.38233333333333336, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.008873806022763182 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_1.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_1.json new file mode 100644 index 0000000000000000000000000000000000000000..e763d2d16ffcd0102dc4a6590762ffe910e118af --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "exercise", + "acc": 0.5423333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009097447488896774 + }, + { + "task_name": "boolq", + "prompt_name": "exercise", + "acc_norm": 0.541, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009099483512819305 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_2.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_2.json new file mode 100644 index 0000000000000000000000000000000000000000..25f315d6f27412d0e758dbc13a55889b95f4f9b2 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "exercise", + "acc": 0.5223333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009121118663627244 + }, + { + "task_name": "boolq", + "prompt_name": "exercise", + "acc_norm": 0.5156666666666667, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009125748094153247 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_3.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_3.json new file mode 100644 index 0000000000000000000000000000000000000000..9dbe62af02e24246eb6298031c4473e8fa8f5ca5 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "exercise", + "acc": 0.5453333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009092626403553739 + }, + { + "task_name": "boolq", + "prompt_name": "exercise", + "acc_norm": 0.5353333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009107405418833937 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_4.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_4.json new file mode 100644 index 0000000000000000000000000000000000000000..9681a3c4bb17a85ef74a3327c7748f83a6200d02 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "exercise", + "acc": 0.5423333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009097447488896775 + }, + { + "task_name": "boolq", + "prompt_name": "exercise", + "acc_norm": 0.5323333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009111120825274597 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_5.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_5.json new file mode 100644 index 0000000000000000000000000000000000000000..eba32bcd63a5ca310745e5e63e7e1c6317576b3d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_exercise_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "exercise", + "acc": 0.543, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009096404868252823 + }, + { + "task_name": "boolq", + "prompt_name": "exercise", + "acc_norm": 0.5323333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009111120825274599 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_0.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..34453f265235d42856c5374eb2f2660d249bdff8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "valid_binary", + "acc": 0.61, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.008906539291295217 + }, + { + "task_name": "boolq", + "prompt_name": "valid_binary", + "acc_norm": 0.4523333333333333, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009088646624339617 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_1.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..dc9774840b944189ed286bf83072d0efdc31b8ac --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "valid_binary", + "acc": 0.5586666666666666, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009067164858263418 + }, + { + "task_name": "boolq", + "prompt_name": "valid_binary", + "acc_norm": 0.5546666666666666, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009075496684215473 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_2.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..5c38e15e4eddc3868d178679febb470fbc37a973 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "valid_binary", + "acc": 0.5856666666666667, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.008995223478188036 + }, + { + "task_name": "boolq", + "prompt_name": "valid_binary", + "acc_norm": 0.5746666666666667, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009027853030468718 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_3.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..78a0a1e08ae717f0fc75a0a6309b2da1d04b94c0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "valid_binary", + "acc": 0.5883333333333334, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.008986619341172333 + }, + { + "task_name": "boolq", + "prompt_name": "valid_binary", + "acc_norm": 0.573, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009032396953831094 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_4.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..7bb9fdacfc1bad109d29e60bb5475c813305f740 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "valid_binary", + "acc": 0.581, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009009627690997004 + }, + { + "task_name": "boolq", + "prompt_name": "valid_binary", + "acc_norm": 0.5746666666666667, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009027853030468722 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_5.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..6379cc2b73aae2125b82fb53ebf192fdf37b3b2a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_valid_binary_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "valid_binary", + "acc": 0.582, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009006610887558775 + }, + { + "task_name": "boolq", + "prompt_name": "valid_binary", + "acc_norm": 0.575, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.00902693165837962 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_0.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_0.json new file mode 100644 index 0000000000000000000000000000000000000000..cdffa544bb396aee628684bf48ecd76d622b8b5d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "yes_no_question", + "acc": 0.624, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.008845002997512754 + }, + { + "task_name": "boolq", + "prompt_name": "yes_no_question", + "acc_norm": 0.6236666666666667, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.008846558976258922 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_1.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_1.json new file mode 100644 index 0000000000000000000000000000000000000000..d419f7b881b25556a194af216b85078952425c0b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "yes_no_question", + "acc": 0.5476666666666666, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009088646624339617 + }, + { + "task_name": "boolq", + "prompt_name": "yes_no_question", + "acc_norm": 0.57, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.009040312075041282 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_2.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_2.json new file mode 100644 index 0000000000000000000000000000000000000000..d4552f4cd66345d6c2f914f3c11df25333a89895 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "yes_no_question", + "acc": 0.579, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009015547313638383 + }, + { + "task_name": "boolq", + "prompt_name": "yes_no_question", + "acc_norm": 0.5933333333333334, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.008969751860881005 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_3.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_3.json new file mode 100644 index 0000000000000000000000000000000000000000..fb73e076e64cbfeca63185ff01a0bedf05736aa8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "yes_no_question", + "acc": 0.5733333333333334, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009031496556538196 + }, + { + "task_name": "boolq", + "prompt_name": "yes_no_question", + "acc_norm": 0.597, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.008956770693005616 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_4.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_4.json new file mode 100644 index 0000000000000000000000000000000000000000..bc1651c88fa0db2685713123a4ec7267b277f4ca --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "yes_no_question", + "acc": 0.575, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009026931658379624 + }, + { + "task_name": "boolq", + "prompt_name": "yes_no_question", + "acc_norm": 0.5986666666666667, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.008950698369218387 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_5.json b/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_5.json new file mode 100644 index 0000000000000000000000000000000000000000..a26d61381184b823fc1c87692cc0725974cfacc6 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_boolq_yes_no_question_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "boolq", + "prompt_name": "yes_no_question", + "acc": 0.5706666666666667, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_stderr": 0.009038582451449428 + }, + { + "task_name": "boolq", + "prompt_name": "yes_no_question", + "acc_norm": 0.5973333333333334, + "dataset_path": "super_glue", + "dataset_name": "boolq", + "subset": null, + "acc_norm_stderr": 0.008955564831687461 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_0.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_0.json new file mode 100644 index 0000000000000000000000000000000000000000..55d5c8ffc952b29039b5c6c4f6c83858b7691006 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_0.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "GPT-3 style", + "acc": 0.4107142857142857, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06633634150359538 + }, + { + "task_name": "cb", + "prompt_name": "GPT-3 style", + "f1": 0.1940928270042194, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_1.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_1.json new file mode 100644 index 0000000000000000000000000000000000000000..37be14504a4d13f417f561c1086cb6ea7be7a3a0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_1.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "GPT-3 style", + "acc": 0.39285714285714285, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.0658538889806635 + }, + { + "task_name": "cb", + "prompt_name": "GPT-3 style", + "f1": 0.27686156921539234, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_2.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_2.json new file mode 100644 index 0000000000000000000000000000000000000000..bec6e5bf5f89aab3733d609b668b3115e3d8f8d4 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_2.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "GPT-3 style", + "acc": 0.44642857142857145, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06703189227942398 + }, + { + "task_name": "cb", + "prompt_name": "GPT-3 style", + "f1": 0.2956393200295639, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_3.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_3.json new file mode 100644 index 0000000000000000000000000000000000000000..aa09636fca0e68b1188c20c8855d218ddc796e67 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_3.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "GPT-3 style", + "acc": 0.39285714285714285, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.0658538889806635 + }, + { + "task_name": "cb", + "prompt_name": "GPT-3 style", + "f1": 0.2596899224806202, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_4.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_4.json new file mode 100644 index 0000000000000000000000000000000000000000..9fde5ab12e4d06e296a8ffd62b8209ce00aaf0ac --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_4.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "GPT-3 style", + "acc": 0.375, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06527912098338669 + }, + { + "task_name": "cb", + "prompt_name": "GPT-3 style", + "f1": 0.23582089552238802, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_5.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_5.json new file mode 100644 index 0000000000000000000000000000000000000000..61d292cee436211e02a248406f4fe2a1958b4598 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_GPT-3-style_5.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "GPT-3 style", + "acc": 0.39285714285714285, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.0658538889806635 + }, + { + "task_name": "cb", + "prompt_name": "GPT-3 style", + "f1": 0.235006435006435, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_0.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_0.json new file mode 100644 index 0000000000000000000000000000000000000000..5f6dd6b454f16baf4ccb26d3a0e9d51fa7ab3f02 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_0.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "MNLI crowdsource", + "acc": 0.4107142857142857, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06633634150359538 + }, + { + "task_name": "cb", + "prompt_name": "MNLI crowdsource", + "f1": 0.1940928270042194, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_1.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_1.json new file mode 100644 index 0000000000000000000000000000000000000000..308f1c250a5f31909053479f77e19bfee299f90d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_1.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "MNLI crowdsource", + "acc": 0.39285714285714285, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.0658538889806635 + }, + { + "task_name": "cb", + "prompt_name": "MNLI crowdsource", + "f1": 0.2842025699168556, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_2.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_2.json new file mode 100644 index 0000000000000000000000000000000000000000..6e0093df8c98ae5d58509df0401f1fc3e1304dc4 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_2.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "MNLI crowdsource", + "acc": 0.4107142857142857, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06633634150359541 + }, + { + "task_name": "cb", + "prompt_name": "MNLI crowdsource", + "f1": 0.28245294474802674, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_3.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_3.json new file mode 100644 index 0000000000000000000000000000000000000000..f4c65216da8aa20c89de7927abb4d1d58ed7a2b5 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_3.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "MNLI crowdsource", + "acc": 0.39285714285714285, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06585388898066351 + }, + { + "task_name": "cb", + "prompt_name": "MNLI crowdsource", + "f1": 0.27449576321756775, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_4.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_4.json new file mode 100644 index 0000000000000000000000000000000000000000..fbf75ddad3fe2f1f5cab9801f0f891a30e64dfcc --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_4.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "MNLI crowdsource", + "acc": 0.4107142857142857, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06633634150359541 + }, + { + "task_name": "cb", + "prompt_name": "MNLI crowdsource", + "f1": 0.28319209039548027, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_5.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_5.json new file mode 100644 index 0000000000000000000000000000000000000000..76ec1ad1c0e4a899e19146dcf0384eaed7a2542e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_MNLI-crowdsource_5.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "MNLI crowdsource", + "acc": 0.35714285714285715, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06460957383809218 + }, + { + "task_name": "cb", + "prompt_name": "MNLI crowdsource", + "f1": 0.24859943977591037, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_0.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_0.json new file mode 100644 index 0000000000000000000000000000000000000000..f0c381b494642e5ae3303c276ffc83278701bb2b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_0.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "can we infer", + "acc": 0.5714285714285714, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06672848092813058 + }, + { + "task_name": "cb", + "prompt_name": "can we infer", + "f1": 0.3983682983682984, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_1.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_1.json new file mode 100644 index 0000000000000000000000000000000000000000..7d03e83c618fc76378bfc3e20c0c31e266647768 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_1.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "can we infer", + "acc": 0.42857142857142855, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06672848092813058 + }, + { + "task_name": "cb", + "prompt_name": "can we infer", + "f1": 0.29449152542372886, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_2.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_2.json new file mode 100644 index 0000000000000000000000000000000000000000..b4778b6558669200180ac9d0c3cc3d64d2c114af --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_2.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "can we infer", + "acc": 0.5, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06741998624632421 + }, + { + "task_name": "cb", + "prompt_name": "can we infer", + "f1": 0.34717335209946043, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_3.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_3.json new file mode 100644 index 0000000000000000000000000000000000000000..00688b3e01330c4d37d572493515767024bc0307 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_3.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "can we infer", + "acc": 0.5, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06741998624632421 + }, + { + "task_name": "cb", + "prompt_name": "can we infer", + "f1": 0.34491725768321513, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_4.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_4.json new file mode 100644 index 0000000000000000000000000000000000000000..9421ccca99a15c55eb1181d8b5eb0e9daf7c1efb --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_4.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "can we infer", + "acc": 0.5, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06741998624632421 + }, + { + "task_name": "cb", + "prompt_name": "can we infer", + "f1": 0.3485060690943043, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_5.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_5.json new file mode 100644 index 0000000000000000000000000000000000000000..71d4249e34283b58c3936cfa4e8906e2d9dfe0aa --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_can-we-infer_5.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "can we infer", + "acc": 0.44642857142857145, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.067031892279424 + }, + { + "task_name": "cb", + "prompt_name": "can we infer", + "f1": 0.3013431013431014, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_0.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_0.json new file mode 100644 index 0000000000000000000000000000000000000000..10ce8b97d1f994169fe7d606cd8df3a1bf52e006 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_0.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.14285714285714285, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.0471841613625583 + }, + { + "task_name": "cb", + "prompt_name": "guaranteed/possible/impossible", + "f1": 0.11815661815661815, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_1.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_1.json new file mode 100644 index 0000000000000000000000000000000000000000..d8fec2d622ac293b85dbc46e8356ba0ec3eaad27 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_1.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.39285714285714285, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.0658538889806635 + }, + { + "task_name": "cb", + "prompt_name": "guaranteed/possible/impossible", + "f1": 0.2842025699168556, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_2.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f69b1e61831cb72d5b0ecbd34ff6d2b6eb623680 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_2.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.375, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06527912098338669 + }, + { + "task_name": "cb", + "prompt_name": "guaranteed/possible/impossible", + "f1": 0.2567567567567568, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_3.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_3.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac162deefda2896638a73c7a2932f1edea1699 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_3.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.35714285714285715, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06460957383809221 + }, + { + "task_name": "cb", + "prompt_name": "guaranteed/possible/impossible", + "f1": 0.22072072072072071, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_4.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d3fe80ab1376ff8208de9fcb7bed113fcabf65d5 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_4.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.39285714285714285, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.0658538889806635 + }, + { + "task_name": "cb", + "prompt_name": "guaranteed/possible/impossible", + "f1": 0.24598511555033295, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_5.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_5.json new file mode 100644 index 0000000000000000000000000000000000000000..307362cb7d451b195c5b1bcb909aa12091c014d0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_guaranteed-possible-impossible_5.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "guaranteed/possible/impossible", + "acc": 0.375, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06527912098338669 + }, + { + "task_name": "cb", + "prompt_name": "guaranteed/possible/impossible", + "f1": 0.25396825396825395, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_0.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_0.json new file mode 100644 index 0000000000000000000000000000000000000000..65882da2b4b30a0baf5298a69542e383549db9f6 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_0.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "justified in saying", + "acc": 0.6071428571428571, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.0658538889806635 + }, + { + "task_name": "cb", + "prompt_name": "justified in saying", + "f1": 0.4233100233100233, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_1.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_1.json new file mode 100644 index 0000000000000000000000000000000000000000..0ac26eb7af78343b232eb0283c81e3cf9c3254bd --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_1.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "justified in saying", + "acc": 0.44642857142857145, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06703189227942398 + }, + { + "task_name": "cb", + "prompt_name": "justified in saying", + "f1": 0.31149301825993553, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_2.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_2.json new file mode 100644 index 0000000000000000000000000000000000000000..79c64b37e4af28c20b5a5a83d52af708ad12bb65 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_2.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "justified in saying", + "acc": 0.48214285714285715, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.0673769750864465 + }, + { + "task_name": "cb", + "prompt_name": "justified in saying", + "f1": 0.33543859649122804, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_3.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_3.json new file mode 100644 index 0000000000000000000000000000000000000000..e442281a39663dd19ba8362252ad47e45342ff06 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_3.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "justified in saying", + "acc": 0.44642857142857145, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.067031892279424 + }, + { + "task_name": "cb", + "prompt_name": "justified in saying", + "f1": 0.3083804143126177, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_4.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_4.json new file mode 100644 index 0000000000000000000000000000000000000000..fe017358047521c0d894e28c4c0a59a6a716f02f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_4.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "justified in saying", + "acc": 0.5, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.06741998624632421 + }, + { + "task_name": "cb", + "prompt_name": "justified in saying", + "f1": 0.34717335209946043, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_5.json b/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_5.json new file mode 100644 index 0000000000000000000000000000000000000000..349f461ff637509439e025b18a5cf41e480a417a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_cb_justified-in-saying_5.json @@ -0,0 +1,33 @@ +{ + "results": [ + { + "task_name": "cb", + "prompt_name": "justified in saying", + "acc": 0.44642857142857145, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null, + "acc_stderr": 0.067031892279424 + }, + { + "task_name": "cb", + "prompt_name": "justified in saying", + "f1": 0.295771144278607, + "dataset_path": "super_glue", + "dataset_name": "cb", + "subset": null + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_0.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_0.json new file mode 100644 index 0000000000000000000000000000000000000000..2b6fa50fe8cd4695e5c076f354822690401ac081 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "best_option", + "acc": 0.58, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.049604496374885836 + }, + { + "task_name": "copa", + "prompt_name": "best_option", + "acc_norm": 0.48, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.050211673156867795 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_1.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_1.json new file mode 100644 index 0000000000000000000000000000000000000000..8be6eacc56e691897d574d5f06cd388c14b93bf3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "best_option", + "acc": 0.57, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.049756985195624284 + }, + { + "task_name": "copa", + "prompt_name": "best_option", + "acc_norm": 0.54, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05009082659620332 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_2.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_2.json new file mode 100644 index 0000000000000000000000000000000000000000..edc7ffdce2b7617e2e090354a9c5e1e30a024082 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "best_option", + "acc": 0.56, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.04988876515698589 + }, + { + "task_name": "copa", + "prompt_name": "best_option", + "acc_norm": 0.51, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05024183937956912 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_3.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_3.json new file mode 100644 index 0000000000000000000000000000000000000000..c2c3e78c1089c6d6f943593d9744be45ce1b25c3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "best_option", + "acc": 0.63, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.04852365870939099 + }, + { + "task_name": "copa", + "prompt_name": "best_option", + "acc_norm": 0.54, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05009082659620332 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_4.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_4.json new file mode 100644 index 0000000000000000000000000000000000000000..678225dc947081117252ba7b8235533c67787ba5 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "best_option", + "acc": 0.59, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.049431107042371025 + }, + { + "task_name": "copa", + "prompt_name": "best_option", + "acc_norm": 0.53, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.050161355804659205 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_5.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_5.json new file mode 100644 index 0000000000000000000000000000000000000000..ceb271b3fbba5ce6a590469f968c82cc50a2572f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_best_option_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "best_option", + "acc": 0.58, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.049604496374885836 + }, + { + "task_name": "copa", + "prompt_name": "best_option", + "acc_norm": 0.56, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.04988876515698589 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_0.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_0.json new file mode 100644 index 0000000000000000000000000000000000000000..9788921695f48813bc9d783670d399924d730f83 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "cause_effect", + "acc": 0.57, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.049756985195624284 + }, + { + "task_name": "copa", + "prompt_name": "cause_effect", + "acc_norm": 0.53, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05016135580465919 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_1.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_1.json new file mode 100644 index 0000000000000000000000000000000000000000..ac699aa684f9ddde92dddcf6e57aa912d7357500 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "cause_effect", + "acc": 0.57, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.04975698519562428 + }, + { + "task_name": "copa", + "prompt_name": "cause_effect", + "acc_norm": 0.51, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05024183937956912 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_2.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_2.json new file mode 100644 index 0000000000000000000000000000000000000000..81fa67506a1a04ac6c1c8ea363956d1b1ffdd52b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "cause_effect", + "acc": 0.53, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.05016135580465919 + }, + { + "task_name": "copa", + "prompt_name": "cause_effect", + "acc_norm": 0.48, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.050211673156867795 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_3.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_3.json new file mode 100644 index 0000000000000000000000000000000000000000..3689d13ff349238c6db20aaccca0f907becf889c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "cause_effect", + "acc": 0.53, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.05016135580465919 + }, + { + "task_name": "copa", + "prompt_name": "cause_effect", + "acc_norm": 0.51, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05024183937956911 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_4.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d22b2788db6cd658b20a0e00f06b28512b5f9569 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "cause_effect", + "acc": 0.52, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.050211673156867795 + }, + { + "task_name": "copa", + "prompt_name": "cause_effect", + "acc_norm": 0.48, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.050211673156867795 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_5.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_5.json new file mode 100644 index 0000000000000000000000000000000000000000..4602ef1b3114979159d1969b9cac8ac8195b2ea2 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_cause_effect_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "cause_effect", + "acc": 0.51, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.05024183937956911 + }, + { + "task_name": "copa", + "prompt_name": "cause_effect", + "acc_norm": 0.48, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.050211673156867795 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_0.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_0.json new file mode 100644 index 0000000000000000000000000000000000000000..c9c2e7177fbebdf3d0cbf685abfab36c2ba524fd --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "choose", + "acc": 0.6, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.04923659639173309 + }, + { + "task_name": "copa", + "prompt_name": "choose", + "acc_norm": 0.47, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05016135580465919 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_1.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_1.json new file mode 100644 index 0000000000000000000000000000000000000000..10172daf119eecae7ace02c3f5a238b7ac8d7fb8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "choose", + "acc": 0.54, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.05009082659620333 + }, + { + "task_name": "copa", + "prompt_name": "choose", + "acc_norm": 0.49, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05024183937956911 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_2.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_2.json new file mode 100644 index 0000000000000000000000000000000000000000..d8f06f6ba833a1106e4acb4f2b3f497643bc7f55 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "choose", + "acc": 0.53, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.05016135580465919 + }, + { + "task_name": "copa", + "prompt_name": "choose", + "acc_norm": 0.51, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05024183937956911 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_3.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_3.json new file mode 100644 index 0000000000000000000000000000000000000000..b8d90fddd0c4d19313a7b84bb6608477649b319e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "choose", + "acc": 0.48, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.050211673156867795 + }, + { + "task_name": "copa", + "prompt_name": "choose", + "acc_norm": 0.49, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05024183937956912 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_4.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_4.json new file mode 100644 index 0000000000000000000000000000000000000000..074c61eece34d59993fdba06b230ce7172b6bfdf --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "choose", + "acc": 0.5, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.050251890762960605 + }, + { + "task_name": "copa", + "prompt_name": "choose", + "acc_norm": 0.45, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.04999999999999999 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_5.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_5.json new file mode 100644 index 0000000000000000000000000000000000000000..1857e45bb1093788806394ca462d8beee05a07ff --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_choose_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "choose", + "acc": 0.48, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.050211673156867795 + }, + { + "task_name": "copa", + "prompt_name": "choose", + "acc_norm": 0.48, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.050211673156867795 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_0.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_0.json new file mode 100644 index 0000000000000000000000000000000000000000..24e276cfe9ad239ee433b8d5dbf16abf2e1c396e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "i_am_hesitating", + "acc": 0.61, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.049020713000019756 + }, + { + "task_name": "copa", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.54, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05009082659620332 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_1.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_1.json new file mode 100644 index 0000000000000000000000000000000000000000..eec79a2ddf26448400ca6f24e48c56a4b83890f6 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "i_am_hesitating", + "acc": 0.55, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.05 + }, + { + "task_name": "copa", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.5, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.050251890762960605 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_2.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_2.json new file mode 100644 index 0000000000000000000000000000000000000000..7dbf4615be3a03c76005b1f084a2adcc9ef7e97b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "i_am_hesitating", + "acc": 0.52, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.050211673156867795 + }, + { + "task_name": "copa", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.49, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05024183937956912 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_3.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_3.json new file mode 100644 index 0000000000000000000000000000000000000000..c6f3fcc7473358dade0373486541afd9fededd8f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "i_am_hesitating", + "acc": 0.52, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.050211673156867795 + }, + { + "task_name": "copa", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.46, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05009082659620332 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_4.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_4.json new file mode 100644 index 0000000000000000000000000000000000000000..42899210b43d9ab798e484531359c0ce1971a1f8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "i_am_hesitating", + "acc": 0.5, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.050251890762960605 + }, + { + "task_name": "copa", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.47, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05016135580465919 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_5.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_5.json new file mode 100644 index 0000000000000000000000000000000000000000..9b0401e59368b85c6cd9bf4c45bdd8f18520f6f8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_i_am_hesitating_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "i_am_hesitating", + "acc": 0.49, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.05024183937956912 + }, + { + "task_name": "copa", + "prompt_name": "i_am_hesitating", + "acc_norm": 0.47, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05016135580465919 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_0.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_0.json new file mode 100644 index 0000000000000000000000000000000000000000..0b906878c8e7d152e5295bab73a2c5b9435f7a87 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "plausible_alternatives", + "acc": 0.59, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.049431107042371025 + }, + { + "task_name": "copa", + "prompt_name": "plausible_alternatives", + "acc_norm": 0.54, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05009082659620332 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_1.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_1.json new file mode 100644 index 0000000000000000000000000000000000000000..9a303f8d98142f213ac475da96a3ef3267eb0345 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "plausible_alternatives", + "acc": 0.53, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.05016135580465919 + }, + { + "task_name": "copa", + "prompt_name": "plausible_alternatives", + "acc_norm": 0.49, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05024183937956912 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_2.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_2.json new file mode 100644 index 0000000000000000000000000000000000000000..549c94da5639b4b45b666302cf7a59d939a44c9d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "plausible_alternatives", + "acc": 0.49, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.05024183937956912 + }, + { + "task_name": "copa", + "prompt_name": "plausible_alternatives", + "acc_norm": 0.47, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05016135580465919 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_3.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_3.json new file mode 100644 index 0000000000000000000000000000000000000000..2f33d592fa951097993cbb859f94792ea7f7c4a8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "plausible_alternatives", + "acc": 0.49, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.05024183937956912 + }, + { + "task_name": "copa", + "prompt_name": "plausible_alternatives", + "acc_norm": 0.48, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.050211673156867795 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_4.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_4.json new file mode 100644 index 0000000000000000000000000000000000000000..7b04d4dc6e74445659e0eee5fb02001ad03edafb --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "plausible_alternatives", + "acc": 0.48, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.050211673156867795 + }, + { + "task_name": "copa", + "prompt_name": "plausible_alternatives", + "acc_norm": 0.45, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.04999999999999999 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_5.json b/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_5.json new file mode 100644 index 0000000000000000000000000000000000000000..739e799cde70723e5f27c4a22f6de90078bed498 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_copa_plausible_alternatives_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "copa", + "prompt_name": "plausible_alternatives", + "acc": 0.46, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_stderr": 0.05009082659620332 + }, + { + "task_name": "copa", + "prompt_name": "plausible_alternatives", + "acc_norm": 0.47, + "dataset_path": "super_glue", + "dataset_name": "copa", + "subset": null, + "acc_norm_stderr": 0.05016135580465919 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_0.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_0.json new file mode 100644 index 0000000000000000000000000000000000000000..d2370d6e80ef1e52ebc84afb1c3b3c2231085167 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "bleu": 2.7137587007909403, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.04289535874730102 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_precision": 0.23420804927860228, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0029343193986469574 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_recall": 0.368972333302577, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0033242062731447717 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_fmeasure": 0.27875544176267436, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0030398850526226597 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_precision": 0.08419089263107597, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0014122078430193529 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_recall": 0.1301132540842083, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0019763854755703455 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_fmeasure": 0.0995456556042845, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001576544642160748 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_precision": 0.18337042886990745, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0018758190997753684 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_recall": 0.3011863301059313, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002348188787579532 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_fmeasure": 0.22171841937262945, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0019873580895336043 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_precision": 0.19284679447481895, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0023339033454095755 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_recall": 0.30684388196998574, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002638945933468923 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_fmeasure": 0.23024990094522646, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0024068830580772417 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_1.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_1.json new file mode 100644 index 0000000000000000000000000000000000000000..10021c6904c8793b623b9b3ff70da079608c98d3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "bleu": 4.776883166568447, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.06188581593802797 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_precision": 0.32376895607264333, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003559481233250432 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_recall": 0.2766644901218466, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.003006971082101755 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_fmeasure": 0.2834915331439006, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.00286004072375514 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_precision": 0.11422580842443067, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.001981771261667616 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_recall": 0.09502385216159488, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016087538033057863 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_fmeasure": 0.09798530798331949, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001591303023518874 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_precision": 0.23647808916387403, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002785237759844657 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_recall": 0.20111037531455733, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0023137520458429714 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_fmeasure": 0.20604394935503653, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0021945968399171094 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_precision": 0.26668028425156176, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0031249451907287393 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_recall": 0.22678921457291987, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0026010547211686662 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_fmeasure": 0.23271534495205093, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002496962503511146 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_2.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8a452246774f1ddb12eb78948a003d43680da265 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "bleu": 6.367232170877392, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.08549395921417903 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_precision": 0.3773758975465212, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0033199301919147046 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_recall": 0.32188781676797334, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002721512158082673 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_fmeasure": 0.3289239334300976, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002499918188495706 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_precision": 0.14546628155822858, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.002076024275324282 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_recall": 0.12068762525775042, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016561296699671543 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_fmeasure": 0.12398725975487611, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016074904321220344 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_precision": 0.2810377453951778, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0027020897811990126 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_recall": 0.23833759611575786, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0021523119739722788 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_fmeasure": 0.24355724501015555, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0019784432789600953 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_precision": 0.3138640600673944, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0029811254616978994 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_recall": 0.26690385060781147, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0024253237610957243 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_fmeasure": 0.2728731326289227, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002250126526299403 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_3.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_3.json new file mode 100644 index 0000000000000000000000000000000000000000..725ba3ec41a53389a7a71c4a3b1bb3aa81380faf --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "bleu": 7.254477332274835, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.11215533468282095 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_precision": 0.4071643250852113, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0030918195348976214 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_recall": 0.3384520664356995, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025658424963617885 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_fmeasure": 0.3494166954536371, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0022556130694930294 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_precision": 0.16367687467995234, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0021599397295835646 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_recall": 0.1321193097997345, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.001688691833299027 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_fmeasure": 0.1372251409323741, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016327922809273173 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_precision": 0.30714914534792875, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002654116948033462 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_recall": 0.2531238455203022, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002079981518486239 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_fmeasure": 0.2616315622148955, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018615767022711104 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_precision": 0.34078206920893017, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.002868278542304959 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_recall": 0.2818084570295315, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0023069075160404608 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_fmeasure": 0.29126143372342, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0020762541949325723 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_4.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d0ab4d151e08ff30db65d4ca2394f2781023f96f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "bleu": 7.513333905948323, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.14766226863484086 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_precision": 0.41870474769317223, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0030519614549617937 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_recall": 0.3420315436967001, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0024879203984106034 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_fmeasure": 0.3555881239462119, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0021502692030659297 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_precision": 0.17051356574406315, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.002206652366301817 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_recall": 0.1358976585047218, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0017083541046320008 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_fmeasure": 0.14162675325545648, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016441659153520706 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_precision": 0.3174787632627764, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0026395803173011943 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_recall": 0.2572488981524024, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002020155375464557 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_fmeasure": 0.2677207737523752, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0017913031037284717 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_precision": 0.3514161137133654, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0028576511858666056 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_recall": 0.2859476044645928, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0022615077541335332 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_fmeasure": 0.29745641696384, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0020148180914382 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_5.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_5.json new file mode 100644 index 0000000000000000000000000000000000000000..f9d7b6752efe7d6fd2e0df9e7287285b030714bb --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_coherent_text_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "bleu": 7.595896684409467, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.11394951713016016 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_precision": 0.4267054087992628, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003104273111695704 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_recall": 0.33916614278424423, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002448118122778936 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge1_fmeasure": 0.35658924039746537, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002154416981057853 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_precision": 0.17671699676015093, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0022460226448767994 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_recall": 0.13693271552714417, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0017033794821993003 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rouge2_fmeasure": 0.14450895819387546, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016668350972141967 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_precision": 0.3255781091740331, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002685842607303053 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_recall": 0.2571008116825621, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0020239439722082807 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeL_fmeasure": 0.27045280819438955, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018187716322355006 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_precision": 0.35879686272429967, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.002902004869955475 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_recall": 0.2844216790439025, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0022327718696866835 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "coherent_text", + "rougeLsum_fmeasure": 0.29917487251322744, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002029876079328482 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_0.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_0.json new file mode 100644 index 0000000000000000000000000000000000000000..66c623f2862db4c57c29cb7c669dc282da670b4c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "bleu": 1.5543129195281387, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.053154855082351425 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_precision": 0.16696716992161004, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0025805192223558792 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_recall": 0.2572825872031971, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.004212164493341407 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_fmeasure": 0.19711096006674347, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.003040616043574641 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_precision": 0.04560883470771521, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0012021308966618743 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_recall": 0.0769178100374361, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.002128049838328794 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_fmeasure": 0.05566703607474666, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001468465064769846 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_precision": 0.11835106119201162, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0014461966600140647 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_recall": 0.1833920610587413, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0026168954114977718 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_fmeasure": 0.13981728919468525, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.00174661286699012 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_precision": 0.13701764629359647, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0019913632568671976 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_recall": 0.21021220612205108, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0033048732121641264 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_fmeasure": 0.1613532463046808, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0023500023254129318 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_1.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_1.json new file mode 100644 index 0000000000000000000000000000000000000000..46dd87a0a228439acbf3b88a9e8bcb06aae62fb2 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "bleu": 4.297512923603484, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.07413629268869817 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_precision": 0.3020908787512572, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0036921538937666133 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_recall": 0.25987043503346535, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.003141567879746835 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_fmeasure": 0.26520071914836685, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.003003658166661617 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_precision": 0.10465034015071716, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0019691347076890445 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_recall": 0.08719562064392963, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0015853267824143741 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_fmeasure": 0.08973585628096593, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0015772173702039572 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_precision": 0.2204806269646829, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0028685459881547795 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_recall": 0.18825101434769329, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002377652096200304 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_fmeasure": 0.1923500412945955, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.002278192833793113 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_precision": 0.24946111667640178, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0032258053795631643 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_recall": 0.2131990405760118, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0026891365363325335 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_fmeasure": 0.21805867415782276, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002597589024739917 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_2.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_2.json new file mode 100644 index 0000000000000000000000000000000000000000..d12d57dfe2dd8b611beb21c51ab96c49aa77dbc6 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "bleu": 5.867162895941705, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.08645931541468421 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_precision": 0.36350270495948706, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003355398168023335 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_recall": 0.30887853999055825, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002783892987479659 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_fmeasure": 0.31591363153481733, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002562871100672284 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_precision": 0.13698348670528768, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0020426287273337175 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_recall": 0.1136012858172398, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016553465039351092 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_fmeasure": 0.11659264308512794, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0015973372961016513 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_precision": 0.27091169751519845, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0027121576953348142 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_recall": 0.2289429812438254, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0021934381495222347 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_fmeasure": 0.2340979483687512, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.002014832984376413 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_precision": 0.30254398731438875, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.003008055599147345 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_recall": 0.256276398824361, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0024731203037475064 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_fmeasure": 0.26219793663870944, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0022935285784009335 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_3.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_3.json new file mode 100644 index 0000000000000000000000000000000000000000..4943a996963c67bbf71915c404c775ac1ff22a3a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "bleu": 6.900082747494956, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.11320138722526062 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_precision": 0.40185459499224246, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0031344547603992636 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_recall": 0.3286363837625974, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002532026641586088 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_fmeasure": 0.3408847326310388, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002233633388817911 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_precision": 0.16009806566984877, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0021974508251131177 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_recall": 0.12657236948027373, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016617454893539316 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_fmeasure": 0.13197675444985899, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001607348415179031 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_precision": 0.3052474939342758, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0027064485265094005 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_recall": 0.2472552075503705, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0020502376164631794 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_fmeasure": 0.2567441918347381, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018394022581844677 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_precision": 0.3376547544199217, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.002925952642220336 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_recall": 0.27426355732116464, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002262112871842013 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_fmeasure": 0.2849098056411087, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0020490356453933285 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_4.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_4.json new file mode 100644 index 0000000000000000000000000000000000000000..c0545c31cb0cdd48970000b49c7caeccda807ece --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "bleu": 7.320031747652725, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.12923534694654085 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_precision": 0.41515066909927334, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0031037936370394118 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_recall": 0.33321944407915566, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.00246664110665828 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_fmeasure": 0.34908186170828154, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002179669059327166 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_precision": 0.17014937408764044, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0022566453563309186 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_recall": 0.13226023424874878, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016784568847184593 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_fmeasure": 0.13929100917815865, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001648417738388695 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_precision": 0.3170205721100898, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002708866680963841 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_recall": 0.25219110868882494, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0020045842681778495 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_fmeasure": 0.26448509530378633, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018168804060454762 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_precision": 0.34988097982765304, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0029179543208130904 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_recall": 0.27907105999460174, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002218854839510192 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_fmeasure": 0.2927886602264322, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0020229110972793574 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_5.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_5.json new file mode 100644 index 0000000000000000000000000000000000000000..814b97fc3c7b7b599d130c858a9279306a2eb8c8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_create_text_for_me_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "bleu": 7.579729216459941, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.10379442969910549 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_precision": 0.4245878302927168, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0030985552342163206 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_recall": 0.3351370843643141, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0024378280020133556 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge1_fmeasure": 0.3538095251081614, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0021545871849702673 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_precision": 0.17516980829315892, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0022649531690162985 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_recall": 0.13470386962627315, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.001692225627937733 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rouge2_fmeasure": 0.14278771616071478, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016729015640305864 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_precision": 0.32504959215644774, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002692604155146814 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_recall": 0.2547905097073822, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002009386966136313 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeL_fmeasure": 0.26921100421071603, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018239111737229203 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_precision": 0.3579093459662674, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0029111349234790906 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_recall": 0.2817445669200069, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002227797407847849 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "create_text_for_me", + "rougeLsum_fmeasure": 0.2975768458732384, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002040211020017694 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_0.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_0.json new file mode 100644 index 0000000000000000000000000000000000000000..e612a02086f2a76dacc440cb21d6076b86185c4c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "bleu": 3.357880740552639, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.05911525500616559 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_precision": 0.15817916976766455, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0011411953748066586 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_recall": 0.1449753921655427, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0013358740390814108 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_fmeasure": 0.14809698368434102, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0011506472021475076 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_precision": 0.10846978571330382, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0009492436851693913 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_recall": 0.0996008655703363, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0010665340200279952 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_fmeasure": 0.10146880175157826, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0009392019833054901 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_precision": 0.15815833643433122, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0011396842377164138 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_recall": 0.14495687364702417, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0013348745641402115 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_fmeasure": 0.14807737584120376, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0011493153027645868 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_precision": 0.15601081480161114, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.001103835539038039 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_recall": 0.14288832426456227, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0012946708510962023 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_fmeasure": 0.14601036905242729, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0011113659770113293 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_1.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_1.json new file mode 100644 index 0000000000000000000000000000000000000000..734dcaa4da3090bec55c0bed6ac29aef7c19162e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "bleu": 4.2427683279460675, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.06477494709625758 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_precision": 0.21918225259331264, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.00210014169544477 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_recall": 0.22082556498109132, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0024742493122335047 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_fmeasure": 0.21181656379511446, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0020085926283200336 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_precision": 0.1163871008313897, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.001129914509168705 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_recall": 0.11332262657648863, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0012724108998245058 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_fmeasure": 0.11107876311211701, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001083915673794437 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_precision": 0.19106349917495133, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0014505997962077457 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_recall": 0.1894466427618812, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0017589499235787709 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_fmeasure": 0.18333822193791077, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0013426559404332522 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_precision": 0.19958112129901742, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0017237243835404959 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_recall": 0.1990541967175752, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002038778096964972 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_fmeasure": 0.19201664569930915, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0016232028917454438 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_2.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_2.json new file mode 100644 index 0000000000000000000000000000000000000000..5aa479bc30d7ae54a915d2a96437f9bc9f4734fc --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "bleu": 5.051230966031554, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.13591434741266697 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_precision": 0.257733606387396, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.002854189812923714 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_recall": 0.2484296444250448, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0028744090194746633 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_fmeasure": 0.24111306272957858, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0024738591515231123 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_precision": 0.12122028894875721, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.001566218681260604 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_recall": 0.11563987732519745, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0015422732429049785 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_fmeasure": 0.11330107614177073, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0013577176314569378 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_precision": 0.21371701405030952, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0020806238798557787 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_recall": 0.20335467777797223, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0020284517357400562 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_fmeasure": 0.19848666892850747, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0016655731215332195 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_precision": 0.22672124933887802, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.002428427353019781 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_recall": 0.21696666131837813, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002404135899742133 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_fmeasure": 0.21119700236219294, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002040241369573437 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_3.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_3.json new file mode 100644 index 0000000000000000000000000000000000000000..315634dd6b6cd2b39125448b086edadbd3a72b56 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "bleu": 5.994293739144544, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.13601894534552475 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_precision": 0.2862040020667472, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003238321483446096 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_recall": 0.26830082610544254, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.003078316721189308 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_fmeasure": 0.2629515004088073, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002731946803977037 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_precision": 0.12802276569983448, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0018589047775403413 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_recall": 0.11943444822480005, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0017609621427282169 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_fmeasure": 0.11772014214137563, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016007488021205615 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_precision": 0.23037063366617458, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002402866496678994 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_recall": 0.21411130784418406, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002232311335363655 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_fmeasure": 0.2105240941617669, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0019150789004942635 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_precision": 0.24519320098691047, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0027928419904608157 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_recall": 0.22888580022427765, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002615628628359519 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_fmeasure": 0.22458434070337602, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.00230510570165974 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_4.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_4.json new file mode 100644 index 0000000000000000000000000000000000000000..3a19826e7335167c614a0c8a2877daee03492f05 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "bleu": 6.295897046683775, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.11584821440017962 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_precision": 0.3075285202414195, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0033945393939165606 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_recall": 0.2801004454252903, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0031191985284490546 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_fmeasure": 0.2770259703392387, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.00277806824406267 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_precision": 0.13304703967240197, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0020060838809736344 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_recall": 0.12206189669119812, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0018573319105401854 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_fmeasure": 0.1204704109621881, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016880105990733003 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_precision": 0.244640019205779, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0025913691710492746 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_recall": 0.2210765996441547, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002310185867597945 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_fmeasure": 0.21909517879981463, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0019961120480864922 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_precision": 0.26210204661833736, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0029901819656978864 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_recall": 0.23756462084158286, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002680022085644319 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_fmeasure": 0.23521486729488383, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002390395310603108 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_5.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_5.json new file mode 100644 index 0000000000000000000000000000000000000000..a0eb7540551e5290a0e645b42f55b3a157d2d34f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_gramatically_correct_text_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "bleu": 6.703087166294118, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.13595717497233803 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_precision": 0.32943509738961013, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0035748420223814196 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_recall": 0.2922771657368818, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0031239846598688573 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge1_fmeasure": 0.29157203264366427, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0028155194363036675 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_precision": 0.14104223605836952, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.00213159571945594 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_recall": 0.12618593841822234, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0018876455590173392 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rouge2_fmeasure": 0.12548313742099432, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0017504707119138294 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_precision": 0.2584532209858507, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002742447042465613 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_recall": 0.22794142525227126, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0023233899066376004 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeL_fmeasure": 0.22747557648943484, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.002019174394965523 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_precision": 0.2777892174460894, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0031494031496789106 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_recall": 0.24548258109140436, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002697258051680226 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_gramatically_correct_text", + "rougeLsum_fmeasure": 0.24501087162389928, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002431781683361551 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_0.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..1dd7325bc4ffa91cb495461e350ee1f321682007 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 0.034167755941272124, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.007307628734226665 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.19301807890145306, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.001670641964229549 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.1302779588596268, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0009550048156890856 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.1471748484883756, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0009958043221996186 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.0024668841759750483, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.00021558605140782248 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.0024274910482947904, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.00020167632183355254 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.0022741562354756397, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0001859749506718914 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.1613842777209177, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0012980656999503429 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.10906527422415299, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0007641988419152662 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.12313336484112318, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0007638110810765412 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.1737032149967068, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0015574347646916537 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.11681049061765152, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0008822320800065422 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.1321175572520423, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0009257711780790996 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_1.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..1ba6e869ad13ed62600cbff8ed68055c625951dc --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 6.480953332643209, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.1333798473522777 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.40802697478059297, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0029673602294223223 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.3261039583636633, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025124189387771036 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.34215503945412495, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002167322468153194 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.153945929557076, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.00209119791686883 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.12079622988004886, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016791741735485287 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.12715929154890004, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016365489693603613 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.3007493740437511, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002487305156158438 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.23850764812452344, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019836094262200904 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.25061818922473783, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0017641406764575026 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3365871813932031, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.002735631481473113 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.26820407020047227, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002246895665921754 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.28167871848928877, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0020153231718161705 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_2.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..b50af8950b54c79fd6280efc667413ad2c47686b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 7.666105297947792, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.13136205041430496 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.4208785617206442, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0030817416724062204 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.3424204393428585, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025248373231055543 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.3560820001983814, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0021795777882845308 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.17233048865607528, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.002143190552622057 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.1383316167107616, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0017503067979860562 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.14362551846595048, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001639558679939512 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.31589969893422243, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002592218877775354 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.2566141330684389, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0020689228041648397 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.2663812276115756, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018062222315789584 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3509539248702352, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0028422730753440103 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.2852154582143963, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0023210148627271433 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.2965891442412763, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0020592217464907307 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_3.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..92f82f9b6d37cec7f3bec61478ec95c1a2ed0e0d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 8.56058254286114, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.15123807567499506 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.43327921520363066, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003097568614374566 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.34769690369124, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002543918652030485 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.36308270867669845, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002191649011082704 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.18895843732454584, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.002286888650673502 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.14872848902577038, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0018036895683928664 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.155349432200804, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016980107119711429 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.33501338216238535, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0027058006091930545 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.267043780702506, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002094068308249151 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.27893233629167613, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018361143289552099 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3685423655496754, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.002930111403674498 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.29466685169867624, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002347617054544211 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.30795704419792747, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0020858121305838396 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_4.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..bc52a261eb4af7f5c01c1e441851faaceb7cd83c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 8.949311991420602, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.15855472500531886 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.4375804506933646, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0032055526063119576 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.3472946902285877, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002539437414200631 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.3640451950801677, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0022138110432792396 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.19419957071290792, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.002374016494731067 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.15145528896504676, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.001854581130642659 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.15872608958193554, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0017627421907090207 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.3396411151186514, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0027863819229066904 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.26869178202876687, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0021284030057315906 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.2812482066830368, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018755893236809696 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.37367146023180203, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0030307585676874636 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.29617959760674445, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0023937351643062293 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.31039320669801307, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002134177354316102 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_5.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5a3985691c2160c81523359bcbf14b7ea0abc9 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 9.131873104913453, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.1609494007900667 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.4394755159466518, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0032068250726382916 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.35023402614950333, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025006999002648547 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.3666709721169093, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0021727553535898286 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.19707980839947012, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0023944009918063663 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.15380875597263907, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0018326439588638127 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.16117969333074014, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0017520909045254573 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.3455815272270704, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0028520716467400477 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.2740572130821665, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0021232292917631344 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.2867364556172849, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018811550503636599 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3778806601818275, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0030588224104805 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.3002582190319247, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0023482445311229245 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.3144340022223061, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002098469397474867 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_0.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_0.json new file mode 100644 index 0000000000000000000000000000000000000000..406f85aa284eb0136682d3f9f1c628a5d6a90d19 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "bleu": 3.1072073402263745, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.054847709586604974 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_precision": 0.2904834399201357, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003209479431805363 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_recall": 0.378959200854369, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0033573634989133632 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_fmeasure": 0.3192562818462074, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.003103481656865381 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_precision": 0.12493081488647073, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0014706683187466658 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_recall": 0.16664906976511587, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.001807700709560424 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_fmeasure": 0.13831925575429133, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0014991664215535973 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_precision": 0.22666655050825654, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0021721413918810626 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_recall": 0.30700944872644476, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0027337619577674933 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_fmeasure": 0.2530898959654513, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.002233323932714259 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_precision": 0.24444049681203178, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0025608900739077897 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_recall": 0.32402288867669254, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0028828379788400547 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_fmeasure": 0.27039500281410955, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002525774163861117 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_1.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_1.json new file mode 100644 index 0000000000000000000000000000000000000000..294fb8491b46fdd146b6e9f4f4c66835a3fb236d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "bleu": 5.768904743696091, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.07694130893367676 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_precision": 0.38598841646744675, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0029352063511176927 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_recall": 0.3120167715681631, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002282231208925737 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_fmeasure": 0.32587000405265004, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002055881315827058 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_precision": 0.141089766958126, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0019905359417947705 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_recall": 0.1103023596204418, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0014920441369873467 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_fmeasure": 0.11624053272466801, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001498193127795664 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_precision": 0.28626570995375356, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0024472272782447135 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_recall": 0.23012511993056003, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0018339215891965562 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_fmeasure": 0.24041537230720197, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.001681675906589535 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_precision": 0.31879426513272485, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.002698431810379339 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_recall": 0.25680714373294367, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002060514880618308 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_fmeasure": 0.2683840002761712, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019114719552552634 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_2.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_2.json new file mode 100644 index 0000000000000000000000000000000000000000..c1a2419e3997e23d73774c649419631ef1b95828 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "bleu": 6.623187234910589, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.09284748268311233 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_precision": 0.40593215484811684, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003106340108516954 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_recall": 0.32006154223100713, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002324542554838695 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_fmeasure": 0.336751496891913, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002100952186916202 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_precision": 0.15978879415281969, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0021982896081617956 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_recall": 0.12215543191803664, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0015773240400394131 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_fmeasure": 0.12917141922678907, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001569747986831894 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_precision": 0.3108908870060753, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002663105799005185 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_recall": 0.24343947890677778, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0018898553924389825 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_fmeasure": 0.2561811353185624, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0017410449710564626 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_precision": 0.34104283897670223, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0028793434150954354 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_recall": 0.2679333081128248, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002109219796922622 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_fmeasure": 0.28203219973113697, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019575989714408007 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_3.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_3.json new file mode 100644 index 0000000000000000000000000000000000000000..2b6f71bfc5f117aaf3d0a09d584c9fbcf0fa0b5c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "bleu": 7.009840500917634, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.16312381322545783 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_precision": 0.42255037024132486, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0031250780400183006 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_recall": 0.3217085619415695, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0023676195043913673 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_fmeasure": 0.3430320576782421, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0020936920124836156 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_precision": 0.17364077254123408, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.002316697656460501 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_recall": 0.12769222375851083, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016388892071143308 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_fmeasure": 0.13699349439736094, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016218763137671342 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_precision": 0.3285519495441955, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002763683063352353 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_recall": 0.2480739442674065, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019648687514971755 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_fmeasure": 0.2647497003936544, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0017804230943033162 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_precision": 0.35718768502576903, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.00295212627343816 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_recall": 0.2706627854942478, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0021634238449362055 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_fmeasure": 0.28884517699797746, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019739366315454183 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_4.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d3c5da03a429a5bcf478d66d75117ae1758f451d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "bleu": 7.107067863801302, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.16706105397816348 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_precision": 0.42460043468036757, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0031591950135965727 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_recall": 0.3214789344478919, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002360243252337688 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_fmeasure": 0.3436390706497596, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002120851612678163 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_precision": 0.17482332505179643, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.002374641561754068 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_recall": 0.12745182984114253, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016392080775069744 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_fmeasure": 0.1374084834705204, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016581364245938277 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_precision": 0.32947339330122327, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.00275863160551055 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_recall": 0.24728616508687778, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019282863931353905 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_fmeasure": 0.2647579237213868, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0017899579331400889 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_precision": 0.3590563916220248, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0029587623344615264 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_recall": 0.2708954741926685, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002158968315143282 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_fmeasure": 0.2897245093598933, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001995382121885309 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_5.json b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_5.json new file mode 100644 index 0000000000000000000000000000000000000000..a7b6326ab9a75a1ebddbfacabb5ac046281c6cd4 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_e2e_nlg_cleaned_text_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "bleu": 7.274383164727865, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.11122428262789567 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_precision": 0.43124377131693664, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003211030428742271 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_recall": 0.3239350199889235, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002364971988949829 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge1_fmeasure": 0.3469575519675918, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0021034752646403756 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_precision": 0.17902835655762828, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.002400473530717413 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_recall": 0.1292480377318174, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016477268883042123 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rouge2_fmeasure": 0.13963971276485945, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016673499985951031 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_precision": 0.3361429661493787, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002859024482019008 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_recall": 0.250039081212901, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019599039144951795 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeL_fmeasure": 0.26834384059308486, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018122635157761808 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_precision": 0.36599409686858575, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.003031621767071787 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_recall": 0.27374374119186806, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0021570988272421876 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "text", + "rougeLsum_fmeasure": 0.29352695616349106, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019956203711713662 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_0.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_0.json new file mode 100644 index 0000000000000000000000000000000000000000..476ae45958003b74e591966eae9e5841416ea547 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_precision": 0.08690252895592265, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0014696237802313788 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_recall": 0.21076760136855094, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0033990058849462626 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_fmeasure": 0.12154953382545712, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0019861476396373216 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_precision": 0.011835058486175484, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006782998894859348 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_recall": 0.03014924646277289, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0016485838218151638 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_fmeasure": 0.01678428011344468, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0009370052903803109 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_precision": 0.07649508725038684, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012036928723305662 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_recall": 0.18622709296396378, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0028294388338103195 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_fmeasure": 0.1070578662991953, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0016219964231111727 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_precision": 0.06936681100471268, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0011946703655621098 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_recall": 0.17032808205023134, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002899791797120825 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_fmeasure": 0.0973663233955277, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0016371882205093247 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "bleu": 0.6380986430759009, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.09497300417280843 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_1.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_1.json new file mode 100644 index 0000000000000000000000000000000000000000..6bafcb07a34eb81a93dde5d135bc4ef6fad36ff9 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_precision": 0.11321015578844655, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0018833111961485554 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_recall": 0.18538730900428743, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0037105443176058024 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_fmeasure": 0.13006038778542497, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002066345113965779 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_precision": 0.008030799491936448, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0005974414949521437 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_recall": 0.01630658025640146, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0013333260688818906 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_fmeasure": 0.010073796253903308, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0007521556541790673 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_precision": 0.0852182314237414, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0014284391091660574 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_recall": 0.13603732951225597, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0025190738181756775 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_fmeasure": 0.09662643610919455, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001430668974086877 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_precision": 0.09002789354250691, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0014892889301457962 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_recall": 0.14770301407061567, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002942468251426108 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_fmeasure": 0.1033309158326748, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.00161329986842399 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "bleu": 0.4956718515669784, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.06144134252581695 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_2.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_2.json new file mode 100644 index 0000000000000000000000000000000000000000..db3b06077e5b1e1d5df2b3a42c357fce374bf867 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_precision": 0.115303289612093, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0019631897374271976 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_recall": 0.1867727451057763, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.00361199337825402 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_fmeasure": 0.1321353013501063, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0020733569592054284 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_precision": 0.009371984198219677, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006185339230947803 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_recall": 0.018833588882257533, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0013508882388750508 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_fmeasure": 0.011705506769985538, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0007650466477809864 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_precision": 0.08671980544288431, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0014435281648377727 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_recall": 0.13855429960506976, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002496409762111219 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_fmeasure": 0.098676375925058, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0014409776012497158 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_precision": 0.0903490116138123, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0014955778687851201 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_recall": 0.14710100552982236, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002836465397213484 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_fmeasure": 0.10364656157003836, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015811434930502115 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "bleu": 0.4649248578493213, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.0713213099450011 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_3.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_3.json new file mode 100644 index 0000000000000000000000000000000000000000..af2bc1eead2170d64ee37879092628a4653cb579 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_precision": 0.11030819972819507, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.002126312863119683 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_recall": 0.18142324171289778, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003822441819750713 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_fmeasure": 0.12726617865153672, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0023059251743363816 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_precision": 0.00966434257562582, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006608656525792718 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_recall": 0.019946294126745322, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0013598720985991016 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_fmeasure": 0.012326768637832572, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0008208778930401783 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_precision": 0.0829643367413115, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0015835310219049267 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_recall": 0.13470266239538448, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002713889010739294 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_fmeasure": 0.09508927308463527, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001653234972150023 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_precision": 0.08664042360478007, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0016238957338166923 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_recall": 0.14392387149210184, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.003047831244734627 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_fmeasure": 0.10033551040789061, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0017901125661630923 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "bleu": 0.6897184873606464, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.09929118591535131 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_4.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_4.json new file mode 100644 index 0000000000000000000000000000000000000000..6146e756b9e806019a64f74efb4301f3ed9a996a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_precision": 0.03312872880982003, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0023364937989206456 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_recall": 0.04331842400385379, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0029785608067230333 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_fmeasure": 0.032825069850753535, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002115896509151276 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_precision": 0.004031880720388147, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007055581272082209 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_recall": 0.006940542757637712, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0010097939996129512 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_fmeasure": 0.0045358862257299385, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.000680391156202852 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_precision": 0.02612465010671101, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0018898939348251488 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_recall": 0.033505823824258554, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002305244406205452 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_fmeasure": 0.02557068204061304, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0016585142146009432 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_precision": 0.02683188172054129, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0019196788856535153 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_recall": 0.035158918419347346, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0024488884244711472 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_fmeasure": 0.026530666924173267, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0017221175504394289 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "bleu": 0.18440179296896278, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.034342854775642216 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_5.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_5.json new file mode 100644 index 0000000000000000000000000000000000000000..3afef514ecc0a2e07b0b163ab045eda16e181b5e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_boils_down_to_simple_idea_that_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_precision": 0.003430531732418525, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0017130559457731933 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_recall": 0.0001996386258755556, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 9.987616180882829e-05 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge1_fmeasure": 0.0003772463816858933, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.00018869438087139786 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_precision": 0.0, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_recall": 0.0, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rouge2_fmeasure": 0.0, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_precision": 0.003430531732418525, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0017130559457731933 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_recall": 0.0001996386258755556, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 9.987616180882829e-05 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeL_fmeasure": 0.0003772463816858933, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.00018869438087139786 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_precision": 0.003430531732418525, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0017130559457731933 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_recall": 0.0001996386258755556, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 9.987616180882829e-05 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "rougeLsum_fmeasure": 0.0003772463816858933, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.00018869438087139786 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_boils_down_to_simple_idea_that", + "bleu": 0.0, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.0 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_0.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_0.json new file mode 100644 index 0000000000000000000000000000000000000000..0312dd9f95a76320fbfd3b768344ff3a4b68c036 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_precision": 0.09636462071540756, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0016678733192933997 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_recall": 0.23676664119512633, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.00385324499120598 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_fmeasure": 0.1353054364687284, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002260494132460735 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_precision": 0.01612079371257744, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007668663392734058 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_recall": 0.0407993478012527, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0018811374457448548 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_fmeasure": 0.02280597280652988, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0010663990484613007 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_precision": 0.0829190298668844, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012981572345846506 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_recall": 0.2049398850561697, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.003055116119810891 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_fmeasure": 0.11660002475056051, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017579801450236503 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_precision": 0.07393255459872417, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.001314680148354152 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_recall": 0.18411144544228827, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0031904748886971623 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_fmeasure": 0.10415585675049703, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0018023677161240045 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "bleu": 0.8587282218390164, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.1092988209328534 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_1.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_1.json new file mode 100644 index 0000000000000000000000000000000000000000..a478012992e0fd2672570c6b2e713ca7e6a43d77 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_precision": 0.11994693258726821, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.002272664862037966 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_recall": 0.2063208707620612, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003701405279713943 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_fmeasure": 0.13999313273083266, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0022002140826437623 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_precision": 0.01473220123107267, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0008753263893803662 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_recall": 0.029347661070468774, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0015701480680528376 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_fmeasure": 0.018246524077524105, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0009676889512530071 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_precision": 0.097949771945427, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0017742187935061273 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_recall": 0.17075558002497426, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0029345772747996486 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_fmeasure": 0.11473299946041993, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0016673962826521769 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_precision": 0.09414781020562817, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0018145520881081784 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_recall": 0.16167126991391795, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0029104592060735126 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_fmeasure": 0.10940608132454639, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0016933691201315847 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "bleu": 0.7674350493376833, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.04722261703727283 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_2.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f54515c8d707d81a60425c3630a011c603de2139 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_precision": 0.1272859637960522, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0027462425850244753 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_recall": 0.20726108396447318, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003480174389659312 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_fmeasure": 0.1432459780290282, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0023213704285313403 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_precision": 0.01752090200988848, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0010115578307486322 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_recall": 0.032264766808514124, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0017258708804944215 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_fmeasure": 0.020808658216032574, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.001109865173400727 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_precision": 0.10342334775733361, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.002079516722716056 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_recall": 0.17360666431954774, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0028928707472305787 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_fmeasure": 0.11786898777293418, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017897922596018406 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_precision": 0.09983622506749402, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.002115393492482011 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_recall": 0.16461169268640363, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002819701566946129 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_fmeasure": 0.1127637907926372, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0018017461138697946 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "bleu": 0.9152942535263101, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.11054552948281171 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_3.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_3.json new file mode 100644 index 0000000000000000000000000000000000000000..809af18b83b79ffd4fb5bd69c44a26a4be516c02 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_precision": 0.1289330354185616, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0030022141554972227 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_recall": 0.19771559769715022, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0036123774127182606 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_fmeasure": 0.14064423672321408, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0025293681234337334 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_precision": 0.018171280861945326, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0009931234243246655 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_recall": 0.030785833829197685, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.001613050728074892 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_fmeasure": 0.02056912761205674, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0010455547309245617 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_precision": 0.10432784481750776, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0022625245110963936 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_recall": 0.16542514116729834, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002987101666461712 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_fmeasure": 0.11535113963082101, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0019250312942772209 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_precision": 0.1017214982197284, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0023262186712280047 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_recall": 0.15836180866185806, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0029184359176326665 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_fmeasure": 0.11133583672050468, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0019306189975949863 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "bleu": 0.8295010385712954, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.10729215881030962 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_4.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_4.json new file mode 100644 index 0000000000000000000000000000000000000000..6d32added6451326a5241eebf0f6ace11c71d40a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_precision": 0.04257852396991355, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.002914976483248318 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_recall": 0.05044280044044616, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0030109427993094294 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_fmeasure": 0.039198145576751886, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0022897766752655114 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_precision": 0.005213726639706752, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007421737476799913 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_recall": 0.007806425202197695, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0009097455571182794 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_fmeasure": 0.005605133916804629, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.000674983978729807 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_precision": 0.035672949988739856, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.002547892797474333 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_recall": 0.042169944453916135, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002493645678486285 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_fmeasure": 0.03233599361611141, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0018540861576038941 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_precision": 0.03487509346325613, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0025355122945714846 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_recall": 0.04044744995699889, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002420429517483526 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_fmeasure": 0.03127361667218934, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.001818299978680775 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "bleu": 0.2878314198644634, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.09522067958952156 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_5.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_5.json new file mode 100644 index 0000000000000000000000000000000000000000..0fed6ef7ea1e5a1e814cbfe2e6d9c1b6b9f120b1 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_DOC_tldr_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_precision": 0.002936256429086033, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0008325661288169507 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_recall": 0.0021595962684425826, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0005981401663289608 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge1_fmeasure": 0.0023996080385096956, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0006585056185108358 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_precision": 0.00041274393383161417, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0001973715747410765 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_recall": 0.0003399001109877913, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.000164471497611397 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rouge2_fmeasure": 0.0003713888095226837, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.00017882541746315305 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_precision": 0.0024675512225894843, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.000714409976176186 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_recall": 0.0017999664297352442, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0004895093404742078 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeL_fmeasure": 0.0019987192664100155, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0005427413769701772 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_precision": 0.002632861772195261, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.000753763054701241 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_recall": 0.001937003366161748, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.000531587586598376 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "rougeLsum_fmeasure": 0.0021468678403871737, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0005854611584876265 + }, + { + "task_name": "gem_xsum", + "prompt_name": "DOC_tldr", + "bleu": 3.539919827428958e-43, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 2.1011888360679252e-37 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_0.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..10043f068475894267ac19edf0cbda8b04be5ac7 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.10993607957935606, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0017262014910658778 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.2671733452707944, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.00390137507010999 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.1538448102106363, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0023116204942085577 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.01835130554145112, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007993797195400837 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.04604209148970363, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.001973243132477277 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.02588589928345875, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0011070282134188164 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.09053264538484676, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012769612144616453 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.22129938057010246, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.00293788000817368 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.12685128021061992, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017022653082941958 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.08513932826931309, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0013864967970779066 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.208617316083849, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.003259825355111851 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.11937933477851356, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0018740476749061043 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.9574482759158731, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.08037361986575367 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_1.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..5175bddac02cb743527c92ee2b89c4a2eeb954f9 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.12072537979214573, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.002167454500355592 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.21838713615156446, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0038259542032858217 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.1452725178193069, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002296281448425658 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.014777621125809935, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0008637330993774701 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.03154673810471188, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.001733277417697726 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.01908194699059756, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0010479813934091 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.09551583464454579, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0015465079617143938 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.17601089949518756, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0029605535227822544 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.11573111078031734, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001668882154190853 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.09358138198842439, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0016368426037437229 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.1713680364199079, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.003137326232956779 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.11300827952303219, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0017954449161295192 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.8464028940550716, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.12057452438636702 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_2.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..535711b704cf5ea465aae4275f9206fa13794a6d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.1277087568466237, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.002600936480819687 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.21240423763263364, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003539650124058849 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.14627685627701034, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0023199421629749627 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.016452983669055295, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0009373447546795308 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.031084003624943502, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0016581096554683838 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.01990450612056915, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0010553925331886055 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.10316942850814137, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.001971701438481884 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.17539290041445985, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002825968625575816 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.11915492784443057, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017454981401993368 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.10014675747700362, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0020339193953665207 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.16798366392500885, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002901019287841266 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.11487832313530352, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0018236145830277607 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.7984106419011859, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.1063368416287446 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_3.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..585994fdcab4b87575da79b79606839777392fe8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.13645821087841936, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0030874846619215407 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.20065597941747373, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0036985721066628736 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.1467279970360384, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0026790925158139463 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.019488958963057586, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0011775452187260563 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.030961861144012604, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0017081744279510592 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.02153589403065349, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.00118190969520155 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.11007763632391766, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.002410392914612521 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.1663257674332732, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0030787485034055036 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.11938767343543244, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.002073060283013559 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.10704427774578769, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0024643770024296028 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.15838825460379957, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0030224100131612764 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.11495841768374035, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0021071380507211635 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.9423552122227806, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.13782619268270152 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_4.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..f86dc7e6b243041dcbc0cb35af38c864d828e465 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.04085291524121532, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.002589850198863279 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.049526509450874855, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.00300127990043923 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.039591317930521744, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0022931685649421212 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.005319857832468002, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007156080942971114 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.007574332469978282, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0009017550685095564 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.005510795996651232, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0006213462695311744 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.03358813626286002, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.002206945445578637 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.04060546427390794, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0024697862019406065 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.03214999836248129, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0018423539820022828 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.03309676277901219, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.002199818877733498 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.03950562122200865, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0024124824570383946 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.03146184764090875, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0018112412925546117 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.17189105743351635, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.038682318528850144 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_5.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..c6db5dfa47d6714763112d74e0e7b051b5c068c3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.0035305925957395084, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0009785490984386396 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.0030440980529733517, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0008638602364047491 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.0031357253217098413, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0008645982383809647 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.0007350839134967882, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.00032922794052150407 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.0006963162623539982, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0003353629065457281 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.0007075672984960149, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0003298891212277737 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.0028741250344439784, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0007961665239105527 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.0025236529245087863, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0007285140262655341 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.0025693656488664164, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0007105243349554819 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.0030230196408857547, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0008304280978614765 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.0026492658288523934, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0007552587612647047 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.0027055779382418573, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0007426343024247237 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 3.736787794674515e-39, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 8.045576123348552e-33 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_0.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_0.json new file mode 100644 index 0000000000000000000000000000000000000000..bc7af2d8241f6920eb40c3a9f6234e7972718060 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_precision": 0.09630367276935313, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.001534883053115542 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_recall": 0.23460591986325105, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0035249062730580206 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_fmeasure": 0.13477841342912497, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002063346031228702 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_precision": 0.015863901454312258, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.000709892449539515 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_recall": 0.03990372817053524, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0018147573204489273 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_fmeasure": 0.02239979264930804, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.000997880305154405 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_precision": 0.08476913174599572, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012339605708654188 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_recall": 0.20729064806304343, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0028798441356743133 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_fmeasure": 0.11875101756591155, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0016571866332250852 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_precision": 0.07482920590283132, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0012132438830642453 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_recall": 0.1845581778085984, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0029597709556143095 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_fmeasure": 0.10504435524250771, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0016540156460037813 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "bleu": 0.827022947248256, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.07927582653684777 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_1.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_1.json new file mode 100644 index 0000000000000000000000000000000000000000..1900b4c22a3c3543a39a8f37b427cb1575933b8a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_precision": 0.09967812302039447, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0017535656187891884 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_recall": 0.23532487213764122, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0037931530072197895 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_fmeasure": 0.13585887003860755, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002175476639683441 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_precision": 0.015458406614911558, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007847122204995969 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_recall": 0.03955801765769148, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.002028121799206321 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_fmeasure": 0.02178998612851725, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.001087232464799662 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_precision": 0.08499045094039762, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.001391372178337716 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_recall": 0.20179056951969515, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.003044247136206819 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_fmeasure": 0.1159863670452197, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0016981881580244976 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_precision": 0.07807531947048092, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0014381437479792241 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_recall": 0.1855216996322975, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.003180975541626223 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_fmeasure": 0.1064016446915024, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0017712110403532188 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "bleu": 0.9250777542928637, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.0995464024191013 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_2.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_2.json new file mode 100644 index 0000000000000000000000000000000000000000..62b0f482e8ca62894267468425ac913cbf1432fd --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_precision": 0.09893709052188565, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0017414703745454985 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_recall": 0.23440267735863968, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003498471803075341 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_fmeasure": 0.1349531450843222, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0020232857713247897 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_precision": 0.015158067141100342, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0008079828861487166 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_recall": 0.037206559662769104, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0017788028367721072 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_fmeasure": 0.020699762711342613, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0009604579863830597 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_precision": 0.08507459833948786, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0013961283150575271 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_recall": 0.20301299123076172, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0028262793046608258 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_fmeasure": 0.11632088160061539, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001583707769523938 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_precision": 0.07692952970836399, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.001416058779535236 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_recall": 0.1833856652655966, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0028828605595491133 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_fmeasure": 0.10493620226111898, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0016118116058243702 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "bleu": 0.7540448660432285, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.09539115530526798 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_3.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_3.json new file mode 100644 index 0000000000000000000000000000000000000000..986eeda72772c3c8f3230c0368cf7041085e44e9 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_precision": 0.09822153128767802, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0021428329923455346 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_recall": 0.2173704671442931, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003785275093896278 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_fmeasure": 0.1288656936450777, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002285142640984343 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_precision": 0.015471263083193475, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007775392244828585 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_recall": 0.03648134817348442, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0018073965169390764 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_fmeasure": 0.02079595560428378, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0010061139041632509 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_precision": 0.08524370672290423, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0017456965733000762 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_recall": 0.19064982924857338, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0031548397347180086 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_fmeasure": 0.11230658338330175, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001849039452146557 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_precision": 0.07709178118688563, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0017340953216017302 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_recall": 0.1714803560163439, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0030653620428528205 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_fmeasure": 0.10101095703467147, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.001798939254091859 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "bleu": 0.8519606095372889, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.10081770314780626 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_4.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_4.json new file mode 100644 index 0000000000000000000000000000000000000000..cc548edf69f4da316ec903c9bda8233552b438fa --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_precision": 0.03689476483753298, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0024463384773361955 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_recall": 0.05735145180342211, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003370593307800384 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_fmeasure": 0.0392783960707505, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0022527501571849374 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_precision": 0.005453105497796419, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007237955340670446 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_recall": 0.009826935945674431, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.001076383576002216 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_fmeasure": 0.006225565560992794, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0006675181095595146 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_precision": 0.030609500618293827, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.00201564752141265 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_recall": 0.04862705218381443, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0027960780471146885 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_fmeasure": 0.03280903331157746, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0018180599330376993 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_precision": 0.028977502639200816, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.001997201119048309 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_recall": 0.04481380017813916, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0026573416129651475 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_fmeasure": 0.030578597001076086, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0017593294917667497 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "bleu": 0.311490095229844, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.08056516332123577 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_5.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_5.json new file mode 100644 index 0000000000000000000000000000000000000000..0dc2eac0a373282a10b26edef9122950d63769f9 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_DOC_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_precision": 0.0027508790221273424, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0007706396307316284 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_recall": 0.0022719827482217438, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.000646293142327662 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge1_fmeasure": 0.002421692802983106, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0006764738834041464 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_precision": 0.0004918476824692141, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.00020999706938142754 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_recall": 0.00044210136884942656, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.00020463083076435227 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rouge2_fmeasure": 0.0004607621449381724, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.00020508003752744115 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_precision": 0.0024136119947061004, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0006725506737183709 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_recall": 0.0019698513077900763, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0005402889527601966 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeL_fmeasure": 0.0021069927716341674, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0005727525693181224 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_precision": 0.0024665686894058876, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0007035053623232305 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_recall": 0.0020393354574629054, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0005858747974607231 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "rougeLsum_fmeasure": 0.002169328410821175, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0006128588314949923 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_DOC", + "bleu": 8.81729878858539e-39, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 2.936570585229035e-33 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_0.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..98ac43a73f845752e417f6f9447ee48752eaf45c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_precision": 0.10015017289953904, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0016276274188076708 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_recall": 0.24643756064476016, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003818429494069263 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_fmeasure": 0.14062273915493637, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0022000708799639716 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_precision": 0.016701004797808148, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007833139485381877 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_recall": 0.04255250278955721, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0020010362587043313 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_fmeasure": 0.02364976404549372, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0010941164002693394 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_precision": 0.08541568567160791, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012793972003565766 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_recall": 0.21080805741967854, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0030118865157834174 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_fmeasure": 0.12001163311402531, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017191781997495605 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_precision": 0.07735151668808841, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0013159109974304754 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_recall": 0.1920023167499489, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0032002989755440677 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_fmeasure": 0.10884604959000686, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0017934244319739035 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "bleu": 0.938533817434639, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.06661804850792503 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_1.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..323be722d13cecf35cd5638125940cf7236892c2 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_precision": 0.12052574025109639, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0022210016746994497 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_recall": 0.24517047230002495, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.004088354034068248 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_fmeasure": 0.15267367243026655, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0024154109938806533 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_precision": 0.01753912194656959, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0009726986505547985 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_recall": 0.039922933252291584, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0019893681203709793 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_fmeasure": 0.023239358291017316, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.001176157253837938 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_precision": 0.09629033131871302, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0016769870522745736 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_recall": 0.19827068105870446, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0031512156971516287 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_fmeasure": 0.12230123546095711, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017734340030687993 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_precision": 0.09385616207958877, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.001782715945523954 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_recall": 0.19236967599868202, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.003399193457550934 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_fmeasure": 0.11886986067012077, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0019377362527584576 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "bleu": 1.0178456834586884, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.09075195516691038 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_2.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..fe4123ece265d26a6eaa721400397cd8dfab026f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_precision": 0.1290901025083865, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0025827449855700116 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_recall": 0.2345962358974404, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0037295243525013715 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_fmeasure": 0.15283133102375288, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002341223307257669 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_precision": 0.019485063384407045, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0011129553201441335 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_recall": 0.03790567635691087, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0019138824398120548 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_fmeasure": 0.02358829661609317, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0011830288079803606 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_precision": 0.10570526084215368, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0020534248709442697 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_recall": 0.19497376652941362, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.003002241641214748 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_fmeasure": 0.12562572604910113, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017798211682304972 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_precision": 0.10165398752195147, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0021383205417301585 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_recall": 0.18432216882062125, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.003052415691915719 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_fmeasure": 0.11974611561902486, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0018765281940315045 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "bleu": 1.0056063433565963, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.12502919332145682 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_3.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..719de3818579ed15c9687a75f10b1cb50f237ce6 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_precision": 0.13244835829967186, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0030908517791536955 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_recall": 0.212588522071666, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003862810884401867 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_fmeasure": 0.14887450442636246, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.00274448239030748 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_precision": 0.021448284322085266, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.001389010296371262 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_recall": 0.034841891806055784, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.001852154768566367 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_fmeasure": 0.02410030299641933, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0013458556293409396 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_precision": 0.10749771032588204, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0024493472667949895 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_recall": 0.17604861924046014, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0031652710323202445 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_fmeasure": 0.1217506880628755, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.002161955887082069 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_precision": 0.104633566179651, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.002509395651495409 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_recall": 0.1689633360044492, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.003201254184241733 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_fmeasure": 0.11764148962890421, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0022216493805243694 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "bleu": 1.033576975690131, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.10612867731517484 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_4.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..de5e0c8b9bbf1774eeb4c9589f9729dd39199e0e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_precision": 0.03956432039685865, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.002695013853025962 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_recall": 0.0492235193132569, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003192852760395571 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_fmeasure": 0.03807978716467659, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0023526154852758037 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_precision": 0.006014101598531974, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007964027992614052 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_recall": 0.008256986942409495, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0009340954377351208 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_fmeasure": 0.006031590914671055, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0006648715199487534 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_precision": 0.03171603699831025, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0021853394627631482 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_recall": 0.04009236083483906, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0026034873427392777 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_fmeasure": 0.030352247654815704, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0018302961576454654 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_precision": 0.031509274099563875, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.002203992098896614 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_recall": 0.03906347934555628, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0025972716725576332 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_fmeasure": 0.029887550353590413, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0018411395918680518 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "bleu": 0.17410955528806296, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.035584087070912734 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_5.json b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..999470886d0dd9f5ddc9e1ab19aa4d32b0caebf8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_gem_xsum_summarize_this_DOC_summary_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_precision": 0.003716409376786735, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.001337034178653581 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_recall": 0.0005091066357522025, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0001976825729298653 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge1_fmeasure": 0.0008897900877168533, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.00034150756554492286 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_precision": 0.0008576329331046312, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0008576329331046335 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_recall": 0.0001072041166380789, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.00010720411663807919 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rouge2_fmeasure": 0.0001905850962454736, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.00019058509624547674 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_precision": 0.003716409376786735, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.001337034178653581 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_recall": 0.0005091066357522025, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0001976825729298653 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeL_fmeasure": 0.0008897900877168533, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.00034150756554492286 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_precision": 0.003716409376786735, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.001337034178653581 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_recall": 0.0005091066357522025, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0001976825729298653 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "rougeLsum_fmeasure": 0.0008897900877168533, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.00034150756554492286 + }, + { + "task_name": "gem_xsum", + "prompt_name": "summarize_this_DOC_summary", + "bleu": 0.0, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.0 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_0.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_0.json new file mode 100644 index 0000000000000000000000000000000000000000..890b192b947f06344aabc028e5426d57339bab18 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "bleu": 3.8125985335008306, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.19585723695627313 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_precision": 0.06195211374786627, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.002094672822781696 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_recall": 0.5162593447897136, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.007535474730992951 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_fmeasure": 0.10026196967185438, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002577507472274814 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_precision": 0.04159189415204392, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0018730228506480733 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_recall": 0.34313550824678557, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.008028791094629359 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_fmeasure": 0.06739212009258444, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0024788238669206223 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_precision": 0.06050388889344785, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0020837386360429886 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_recall": 0.5050572660910998, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0075928927820709475 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_fmeasure": 0.09782216479158326, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.002562059753885468 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_precision": 0.058339101160589345, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0020763508063761675 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_recall": 0.49022104146755285, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.007674824806892895 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_fmeasure": 0.09420308480263866, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0025478112691233964 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_1.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_1.json new file mode 100644 index 0000000000000000000000000000000000000000..946c99c1c8c5651163d0fd28a3110466e34fb71e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "bleu": 4.984115818676601, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.2014952793991324 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_precision": 0.19442793157823965, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.00631830683765845 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_recall": 0.370171505230012, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.008099242120212022 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_fmeasure": 0.18342088795857475, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.005818959285067925 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_precision": 0.11018136222612517, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.005443725071980991 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_recall": 0.2408430767207643, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.007813135912300767 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_fmeasure": 0.11307708650003363, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.005231782535257262 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_precision": 0.1830299827679806, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.006160676873355797 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_recall": 0.36225372579005893, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.008159488239081773 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_fmeasure": 0.17550095028177032, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.00577802693447086 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_precision": 0.18472309686765498, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.006223758103180095 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_recall": 0.3537491479629035, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.00808717995340113 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_fmeasure": 0.17499439612022724, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.005797120916813952 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_2.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_2.json new file mode 100644 index 0000000000000000000000000000000000000000..42642b9222bddac29e59440755dc9c40ba388e11 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "bleu": 8.796771634022612, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.3911316659754168 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_precision": 0.3383306969374818, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.00792879108703781 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_recall": 0.4846166240136867, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.00804262853108372 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_fmeasure": 0.3228173852707755, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.007403843294444907 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_precision": 0.2300011447301482, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.007377109399709124 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_recall": 0.3444986994568273, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.00824995631557637 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_fmeasure": 0.22646917848202447, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.00698616733256225 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_precision": 0.3246607866009956, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.007877219184249775 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_recall": 0.47418408456836464, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.008158139222284562 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_fmeasure": 0.31247494161086015, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0074228242548635155 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_precision": 0.32707982563239135, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.007911882392548962 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_recall": 0.46874851911407, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.008123276443221847 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_fmeasure": 0.312822879065026, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.007434259241066784 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_3.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_3.json new file mode 100644 index 0000000000000000000000000000000000000000..5265b109262ffdad05a9c182f87a3e28a5dd5dc3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "bleu": 10.92470649416081, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.34911211757447225 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_precision": 0.4066260252913565, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.008231212467132855 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_recall": 0.5342107425741864, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.007869279508511906 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_fmeasure": 0.38818722809693845, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.00771433388675621 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_precision": 0.2880242729653989, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.007821913844444021 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_recall": 0.3925434030174727, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.008258713922522358 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_fmeasure": 0.2818434589400104, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.007413903409954248 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_precision": 0.39159852574062887, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.008194163001101947 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_recall": 0.5241346096429227, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.007997099796142482 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_fmeasure": 0.37744167318989674, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.007752920486516649 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_precision": 0.39413857987963213, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.008220017403685996 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_recall": 0.5206460681141725, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.007963384607750488 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_fmeasure": 0.3780280451942055, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.00775661536455512 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_4.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_4.json new file mode 100644 index 0000000000000000000000000000000000000000..1364c6946397873cfb326c868fdd522bb0a7a02a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "bleu": 12.858154272179098, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.7278916577243625 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_precision": 0.4624335317026793, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.00826557767550841 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_recall": 0.563714294146611, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.007612246963891272 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_fmeasure": 0.4351861616924822, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.007752659407235736 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_precision": 0.33238154342689963, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.008074274590958037 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_recall": 0.4191511130266625, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.008174601438156475 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_fmeasure": 0.32028893653738877, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.007613634129323394 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_precision": 0.44619870598373307, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.008266117528340293 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_recall": 0.5526110549048481, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0077829782817278575 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_fmeasure": 0.423598898334552, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.00783537952283608 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_precision": 0.4499232867985444, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.008282275077255348 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_recall": 0.5499968165738798, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.007744452222402591 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_fmeasure": 0.4248676289113619, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.007827628127897685 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_5.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_5.json new file mode 100644 index 0000000000000000000000000000000000000000..d3a335405276c25e10fdb9a7330a613d87449d02 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_Correct-the-solution_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "bleu": 14.060734984477335, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.3178125511335596 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_precision": 0.48801414741034715, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.008239688228502035 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_recall": 0.5852280389043574, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.007442570769433745 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge1_fmeasure": 0.46127098559948143, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.007762781204476088 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_precision": 0.35639429216000035, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0080903487818709 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_recall": 0.4396664484838038, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.008127010865714702 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rouge2_fmeasure": 0.3436917448426498, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.007674129864826368 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_precision": 0.4725439297415101, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.008240735797918716 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_recall": 0.5746878552043578, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.007604121138500754 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeL_fmeasure": 0.45010094870717293, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.007834754180971275 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_precision": 0.47616423957106324, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.008273908357739463 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_recall": 0.5724908344656849, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.007578130144250922 + }, + { + "task_name": "piqa", + "prompt_name": "Correct the solution", + "rougeLsum_fmeasure": 0.4513631978392974, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.007835514039656419 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_0.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_0.json new file mode 100644 index 0000000000000000000000000000000000000000..db921f629c7a51a94c532aad0c8e8d6dd1c02c7b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "choose the most appropriate solution", + "acc": 0.49891186071817195, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011665796539540878 + }, + { + "task_name": "piqa", + "prompt_name": "choose the most appropriate solution", + "acc_norm": 0.49891186071817195, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011665796539540878 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_1.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_1.json new file mode 100644 index 0000000000000000000000000000000000000000..ac9caef89b46fefb9d97baadc137ddf4b89dba72 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "choose the most appropriate solution", + "acc": 0.5021762785636561, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011665713661738877 + }, + { + "task_name": "piqa", + "prompt_name": "choose the most appropriate solution", + "acc_norm": 0.5021762785636561, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011665713661738877 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_2.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_2.json new file mode 100644 index 0000000000000000000000000000000000000000..3be236c2ae16d5c7e1f00195a04478ef517bb999 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "choose the most appropriate solution", + "acc": 0.5119695321001088, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011662480968070068 + }, + { + "task_name": "piqa", + "prompt_name": "choose the most appropriate solution", + "acc_norm": 0.5119695321001088, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011662480968070068 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_3.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_3.json new file mode 100644 index 0000000000000000000000000000000000000000..8c1c4a2c67dab796260c282d1dd60f23d6356001 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "choose the most appropriate solution", + "acc": 0.5032644178454843, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.01166557553076037 + }, + { + "task_name": "piqa", + "prompt_name": "choose the most appropriate solution", + "acc_norm": 0.5032644178454843, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.01166557553076037 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_4.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b612bdfab26487e595387d2c933b5eaf0a765426 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "choose the most appropriate solution", + "acc": 0.5038084874863983, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.0116654857447468 + }, + { + "task_name": "piqa", + "prompt_name": "choose the most appropriate solution", + "acc_norm": 0.5038084874863983, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.0116654857447468 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_5.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_5.json new file mode 100644 index 0000000000000000000000000000000000000000..6139da5512ae5a0d53b2ece86f50632e326961ad --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_choose-the-most-appropriate-solution_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "choose the most appropriate solution", + "acc": 0.5016322089227421, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011665762007194868 + }, + { + "task_name": "piqa", + "prompt_name": "choose the most appropriate solution", + "acc_norm": 0.5016322089227421, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011665762007194868 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_0.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_0.json new file mode 100644 index 0000000000000000000000000000000000000000..425bdbdb885a6614108d1ae57fd01fc8b52aa08d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "bleu": 0.08868063793537338, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.010679205176766326 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_precision": 0.024079092011199548, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0012751225464276315 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_recall": 0.16382504234512227, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0033146808351779324 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_fmeasure": 0.03326642877020731, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0009655071900921132 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_precision": 0.0026147181115056114, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0002787850657177482 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_recall": 0.01942570519259786, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0011345558550410505 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_fmeasure": 0.0038833183768095616, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0002654112249994602 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_precision": 0.02173563771564973, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0011135246337014074 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_recall": 0.1525533994924883, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.003123796083077344 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_fmeasure": 0.030279861281445327, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0008455385563357755 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_precision": 0.019933519868235043, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0011006754681286612 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_recall": 0.1373792671164724, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002829849172122982 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_fmeasure": 0.02719812824801991, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0007883831967628123 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_1.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_1.json new file mode 100644 index 0000000000000000000000000000000000000000..bba86b91fb2a650b302409c6b657345dd41acc28 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "bleu": 0.13588134778013236, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.015459519670994496 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_precision": 0.05300894951369402, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0027301529015735723 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_recall": 0.1061507145383213, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0032149427593071117 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_fmeasure": 0.04901346680697841, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002055865923271878 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_precision": 0.008494513980757509, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0008377205870598726 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_recall": 0.015925496135927367, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0011332549301939603 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_fmeasure": 0.007982487413659915, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.000712887056916324 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_precision": 0.0457742343119478, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002346181762140086 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_recall": 0.09699249115638213, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.003006081235657284 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_fmeasure": 0.04302281186011969, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018156151734925613 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_precision": 0.04604314004112248, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.002446501769385043 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_recall": 0.090627933605973, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002845023971018631 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_fmeasure": 0.042082706048624005, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0018277105669673647 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_2.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_2.json new file mode 100644 index 0000000000000000000000000000000000000000..2a3b09671d4a2bd9b702baf10e91dbd53d3536f8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "bleu": 0.2131390370421847, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.034066817734553397 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_precision": 0.04766490997973889, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0027505872541556155 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_recall": 0.056720871691876285, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002671280465988878 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_fmeasure": 0.03799621801376054, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0019833229325767424 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_precision": 0.007753824889172869, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0009180726582653088 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_recall": 0.009676078489313466, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0009719598996118394 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_fmeasure": 0.0063412101839208194, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0006568616041856655 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_precision": 0.04049213061658682, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0023256155896140274 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_recall": 0.05059606948650782, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0024394702454846825 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_fmeasure": 0.03271614089031179, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0017135896610323722 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_precision": 0.041656568079500036, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0024499403689278464 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_recall": 0.04892348784841611, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002356085346485099 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_fmeasure": 0.03282280188970894, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0017310021914839983 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_3.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_3.json new file mode 100644 index 0000000000000000000000000000000000000000..831d82aa596228234bbb728e72a2a6f47938a2df --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "bleu": 0.22997720223598342, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.02338425872757522 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_precision": 0.05031505431843015, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0029584770445463347 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_recall": 0.04384933797682676, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002438097616145567 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_fmeasure": 0.03710567348980287, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0020180005367721313 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_precision": 0.008144343048337636, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0009881296494107798 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_recall": 0.0074638698626018, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0009315826019915539 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_fmeasure": 0.006082833337651904, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0006978818850891022 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_precision": 0.043579152184238995, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0025694327719803156 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_recall": 0.03882153127320646, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002203760453572274 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_fmeasure": 0.032270439068929166, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0017622474773529432 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_precision": 0.04474778358521563, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.002654591770374059 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_recall": 0.03883827376493022, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0022045086521405998 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_fmeasure": 0.0327651263489432, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001788852779448503 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_4.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_4.json new file mode 100644 index 0000000000000000000000000000000000000000..a1fb9106f654ce81fca048ddc6dd202b1cb4accf --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "bleu": 0.2712297654901753, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.053042750349294135 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_precision": 0.0542226515580773, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0030939110930176925 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_recall": 0.046451743768737, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025439882698671474 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_fmeasure": 0.04059849587408926, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002182568730131035 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_precision": 0.008974181628774633, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0009729364560951758 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_recall": 0.00866884991169637, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0010298253089854248 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_fmeasure": 0.0070696546789010785, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0007767095517004894 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_precision": 0.04715725401216418, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0027156375130040103 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_recall": 0.040955294760240304, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002277246735797152 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_fmeasure": 0.03526961790871762, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0019038989493982874 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_precision": 0.048027622411834725, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.002785556905287026 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_recall": 0.040990146321799474, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002275559958385726 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_fmeasure": 0.03562639655601933, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019216190005916407 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_5.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_5.json new file mode 100644 index 0000000000000000000000000000000000000000..efedc50f19c86c501617d4d2de7f63ee2b58f1c9 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_no-prompt-needed_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "bleu": 0.1498156695877611, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.028223046286397702 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_precision": 0.05946021488215291, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.00327624600239411 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_recall": 0.04615385829211834, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0024475462608166006 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge1_fmeasure": 0.042735679103126284, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002181587233057925 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_precision": 0.0075902239127463664, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0008916116262057205 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_recall": 0.007204549919294013, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.000911423031900326 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rouge2_fmeasure": 0.006029826803091018, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0007150551789811505 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_precision": 0.05132083543992989, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0028482950530335353 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_recall": 0.04072766325284397, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002204434423881122 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeL_fmeasure": 0.03698020550413456, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018951966460887433 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_precision": 0.05260647328921363, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.00295071182859548 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_recall": 0.04093574793020607, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0022066554152275607 + }, + { + "task_name": "piqa", + "prompt_name": "no prompt needed", + "rougeLsum_fmeasure": 0.03752446063060584, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019205547729290852 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_0.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_0.json new file mode 100644 index 0000000000000000000000000000000000000000..049b2104ae09d3f4739379848c605581a3802cf0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "pick_correct_choice_index", + "acc": 0.49510337323177367, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.01166526473007815 + }, + { + "task_name": "piqa", + "prompt_name": "pick_correct_choice_index", + "acc_norm": 0.49510337323177367, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.01166526473007815 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_1.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_1.json new file mode 100644 index 0000000000000000000000000000000000000000..2965ea83214c8ab8023fdef0fc74fa82fb482372 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "pick_correct_choice_index", + "acc": 0.5048966267682263, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.01166526473007814 + }, + { + "task_name": "piqa", + "prompt_name": "pick_correct_choice_index", + "acc_norm": 0.5048966267682263, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.01166526473007814 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_2.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_2.json new file mode 100644 index 0000000000000000000000000000000000000000..da01ed8a634d47af1250c4f20d5e19e87f62ffc1 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "pick_correct_choice_index", + "acc": 0.5027203482045702, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011665651503000718 + }, + { + "task_name": "piqa", + "prompt_name": "pick_correct_choice_index", + "acc_norm": 0.5027203482045702, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011665651503000718 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_3.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_3.json new file mode 100644 index 0000000000000000000000000000000000000000..1235edbbd81ac3254962d9c7e61f2461c4b84ec6 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "pick_correct_choice_index", + "acc": 0.5108813928182807, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011663061261117758 + }, + { + "task_name": "piqa", + "prompt_name": "pick_correct_choice_index", + "acc_norm": 0.5108813928182807, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011663061261117758 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_4.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_4.json new file mode 100644 index 0000000000000000000000000000000000000000..706aa57ec207905236fe17a332d1bf347d64cc69 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "pick_correct_choice_index", + "acc": 0.5087051142546246, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011664055982032842 + }, + { + "task_name": "piqa", + "prompt_name": "pick_correct_choice_index", + "acc_norm": 0.5087051142546246, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011664055982032842 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_5.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_5.json new file mode 100644 index 0000000000000000000000000000000000000000..c1add6e1ea2812fbd072eced7624a118a19112a4 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_pick_correct_choice_index_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "pick_correct_choice_index", + "acc": 0.5032644178454843, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011665575530760367 + }, + { + "task_name": "piqa", + "prompt_name": "pick_correct_choice_index", + "acc_norm": 0.5032644178454843, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011665575530760367 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_0.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_0.json new file mode 100644 index 0000000000000000000000000000000000000000..a5639379622a0a7301f0a00759edecc4546c4890 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "what_is_the_correct_ending", + "acc": 0.5136017410228509, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.01166150683982377 + }, + { + "task_name": "piqa", + "prompt_name": "what_is_the_correct_ending", + "acc_norm": 0.5179542981501633, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011658300623287153 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_1.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_1.json new file mode 100644 index 0000000000000000000000000000000000000000..7cd493528bbeaf2fc02bb99e1ab0ae3e9775ea90 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "what_is_the_correct_ending", + "acc": 0.5206746463547334, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011655846995729705 + }, + { + "task_name": "piqa", + "prompt_name": "what_is_the_correct_ending", + "acc_norm": 0.5255712731229597, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011650557844573577 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_2.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8458a246d0e47fe8bb9d798599a02e2360d9a50d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "what_is_the_correct_ending", + "acc": 0.5201305767138193, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011656365410780373 + }, + { + "task_name": "piqa", + "prompt_name": "what_is_the_correct_ending", + "acc_norm": 0.5206746463547334, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011655846995729705 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_3.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_3.json new file mode 100644 index 0000000000000000000000000000000000000000..679ddd73c39fedee36483beb707c29877cea4a86 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "what_is_the_correct_ending", + "acc": 0.5223068552774756, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011654208652596471 + }, + { + "task_name": "piqa", + "prompt_name": "what_is_the_correct_ending", + "acc_norm": 0.5266594124047879, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.01164922999434739 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_4.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_4.json new file mode 100644 index 0000000000000000000000000000000000000000..91efe16fce9b8231e3f2a32b935f8821d1ddc2cf --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "what_is_the_correct_ending", + "acc": 0.529923830250272, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011644913435420155 + }, + { + "task_name": "piqa", + "prompt_name": "what_is_the_correct_ending", + "acc_norm": 0.529379760609358, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011645667565050864 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_5.json b/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_5.json new file mode 100644 index 0000000000000000000000000000000000000000..f1701753691e29763579ea77eee69cd6e8939cee --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_piqa_what_is_the_correct_ending_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "piqa", + "prompt_name": "what_is_the_correct_ending", + "acc": 0.5272034820457019, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.011648545262429024 + }, + { + "task_name": "piqa", + "prompt_name": "what_is_the_correct_ending", + "acc_norm": 0.5250272034820457, + "dataset_path": "piqa", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.011651200967623707 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_0.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_0.json new file mode 100644 index 0000000000000000000000000000000000000000..374a8ff45b66fec5c9dddde6bc2e7cbc174ce90e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Direct Question (Closed Book)", + "acc": 0.397, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015480007449307996 + }, + { + "task_name": "sciq", + "prompt_name": "Direct Question (Closed Book)", + "acc_norm": 0.377, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.01533317012577986 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_1.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_1.json new file mode 100644 index 0000000000000000000000000000000000000000..845227c43b2183d2021f15b3cf3b798c60d2d579 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Direct Question (Closed Book)", + "acc": 0.415, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015589035185604628 + }, + { + "task_name": "sciq", + "prompt_name": "Direct Question (Closed Book)", + "acc_norm": 0.398, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015486634102858922 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_2.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8240cdc596af6f48a05ccbbb4349c978c88cf62c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Direct Question (Closed Book)", + "acc": 0.405, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015531136990453049 + }, + { + "task_name": "sciq", + "prompt_name": "Direct Question (Closed Book)", + "acc_norm": 0.397, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015480007449307994 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_3.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_3.json new file mode 100644 index 0000000000000000000000000000000000000000..25787553291fa7f5bfe4fa98576217fc3d5b4ff3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Direct Question (Closed Book)", + "acc": 0.396, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015473313265859406 + }, + { + "task_name": "sciq", + "prompt_name": "Direct Question (Closed Book)", + "acc_norm": 0.392, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015445859463771297 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_4.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_4.json new file mode 100644 index 0000000000000000000000000000000000000000..1591013ae8c2e6ab885185a37ed327b6fa16ef46 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Direct Question (Closed Book)", + "acc": 0.383, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015380102325652721 + }, + { + "task_name": "sciq", + "prompt_name": "Direct Question (Closed Book)", + "acc_norm": 0.392, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015445859463771295 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_5.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_5.json new file mode 100644 index 0000000000000000000000000000000000000000..9d060b08b2026118f46ece71d9c3fd0a0ec6eb7f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question-(Closed-Book)_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Direct Question (Closed Book)", + "acc": 0.388, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.01541731797991108 + }, + { + "task_name": "sciq", + "prompt_name": "Direct Question (Closed Book)", + "acc_norm": 0.379, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.01534909100222535 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_0.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_0.json new file mode 100644 index 0000000000000000000000000000000000000000..77978e533d7db68dad15f38622493efa60bbf053 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Direct Question", + "acc": 0.703, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.014456832294801103 + }, + { + "task_name": "sciq", + "prompt_name": "Direct Question", + "acc_norm": 0.6, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015499685165842596 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_1.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_1.json new file mode 100644 index 0000000000000000000000000000000000000000..85d29e96621c0706349f0526323855254c28b3a7 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Direct Question", + "acc": 0.723, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.014158794845306265 + }, + { + "task_name": "sciq", + "prompt_name": "Direct Question", + "acc_norm": 0.666, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.014922019523732965 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_2.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_2.json new file mode 100644 index 0000000000000000000000000000000000000000..319c1a8b8ee09e9acb9416a415e5b7c7da6a9d7e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Direct Question", + "acc": 0.721, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.014190150117612037 + }, + { + "task_name": "sciq", + "prompt_name": "Direct Question", + "acc_norm": 0.686, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.014683991951087974 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_3.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_3.json new file mode 100644 index 0000000000000000000000000000000000000000..8dbc1796aa718120e1d3ef711969b6356c69c652 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Direct Question", + "acc": 0.719, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.014221154708434944 + }, + { + "task_name": "sciq", + "prompt_name": "Direct Question", + "acc_norm": 0.692, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.014606483127342758 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_4.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_4.json new file mode 100644 index 0000000000000000000000000000000000000000..531952caf11b04fe6a100f42bde34c2e8388d291 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Direct Question", + "acc": 0.723, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.014158794845306265 + }, + { + "task_name": "sciq", + "prompt_name": "Direct Question", + "acc_norm": 0.701, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.014484778521220465 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_5.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_5.json new file mode 100644 index 0000000000000000000000000000000000000000..c525632f98119f5bd3890fec682636a6594d823e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Direct-Question_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Direct Question", + "acc": 0.718, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.014236526215291354 + }, + { + "task_name": "sciq", + "prompt_name": "Direct Question", + "acc_norm": 0.703, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.014456832294801103 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_0.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_0.json new file mode 100644 index 0000000000000000000000000000000000000000..3a192c503abef85567e5a27add61bce0fd512fcf --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice (Closed Book)", + "acc": 0.373, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015300493622922814 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice (Closed Book)", + "acc_norm": 0.34, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.014987482264363937 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_1.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_1.json new file mode 100644 index 0000000000000000000000000000000000000000..fe40d2f79475a821dbd8889a5e5a24e60a611221 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice (Closed Book)", + "acc": 0.376, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015325105508898129 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice (Closed Book)", + "acc_norm": 0.391, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015438826294681783 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_2.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_2.json new file mode 100644 index 0000000000000000000000000000000000000000..e38415d9c69060745f2695245fbc48155fbca663 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice (Closed Book)", + "acc": 0.36, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015186527932040117 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice (Closed Book)", + "acc_norm": 0.351, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015100563798316403 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_3.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_3.json new file mode 100644 index 0000000000000000000000000000000000000000..9a0f079fd215b00e07625df5053c4dace97d8817 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice (Closed Book)", + "acc": 0.325, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.014818724459095524 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice (Closed Book)", + "acc_norm": 0.345, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015039986742055237 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_4.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_4.json new file mode 100644 index 0000000000000000000000000000000000000000..211fd2612c254806976a2f52e42b871bdf7a4a59 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice (Closed Book)", + "acc": 0.341, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.014998131348402707 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice (Closed Book)", + "acc_norm": 0.343, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015019206922356951 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_5.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_5.json new file mode 100644 index 0000000000000000000000000000000000000000..49ca6432312e801dc5fe40c6e4559a5c67ae894b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-(Closed-Book)_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice (Closed Book)", + "acc": 0.332, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.014899597242811473 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice (Closed Book)", + "acc_norm": 0.328, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.014853842487270334 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_0.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_0.json new file mode 100644 index 0000000000000000000000000000000000000000..9c4cd941963cf7adfe719220a14997a70b6b12af --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice Question First", + "acc": 0.508, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015817274929209004 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice Question First", + "acc_norm": 0.408, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015549205052920673 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_1.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_1.json new file mode 100644 index 0000000000000000000000000000000000000000..4dc17c87c71ecb562a6e2adee31914de6cf570bb --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice Question First", + "acc": 0.411, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015566673418599278 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice Question First", + "acc_norm": 0.423, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.01563058909047635 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_2.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_2.json new file mode 100644 index 0000000000000000000000000000000000000000..e032b92195575431dc1aa5af0ce4e7670ac03314 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice Question First", + "acc": 0.366, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015240612726405754 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice Question First", + "acc_norm": 0.379, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.01534909100222535 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_3.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_3.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf9a533dbc6bb3fd2b172dd60503016d693381b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice Question First", + "acc": 0.368, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.0152580735615218 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice Question First", + "acc_norm": 0.376, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.01532510550889813 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_4.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_4.json new file mode 100644 index 0000000000000000000000000000000000000000..6a66706b63f47c89388a83b9ad7374a8960e84b1 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice Question First", + "acc": 0.37, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015275252316519362 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice Question First", + "acc_norm": 0.371, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015283736211823187 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_5.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_5.json new file mode 100644 index 0000000000000000000000000000000000000000..3c4a6ff74723136d4b96fc5492a80d5472be5dd0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice-Question-First_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice Question First", + "acc": 0.361, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015195720118175115 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice Question First", + "acc_norm": 0.382, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015372453034968522 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_0.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_0.json new file mode 100644 index 0000000000000000000000000000000000000000..909fa8bc10919f9753bbb5aaca3016a9a61607d3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice", + "acc": 0.486, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015813097547730987 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice", + "acc_norm": 0.414, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015583544104177519 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_1.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_1.json new file mode 100644 index 0000000000000000000000000000000000000000..b4e6760dc07240df2b4be3e54b2463a0db9b9ad5 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice", + "acc": 0.427, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015649789644462224 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice", + "acc_norm": 0.436, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015689173023144067 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_2.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_2.json new file mode 100644 index 0000000000000000000000000000000000000000..da947ca68c294b07d58ffc664fcb8f16500321ee --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice", + "acc": 0.401, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015506109745498325 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice", + "acc_norm": 0.389, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015424555647308493 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_3.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_3.json new file mode 100644 index 0000000000000000000000000000000000000000..7acab5d608c48f883a43c013056cfab051614a64 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice", + "acc": 0.385, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015395194445410808 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice", + "acc_norm": 0.391, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015438826294681783 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_4.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_4.json new file mode 100644 index 0000000000000000000000000000000000000000..324de2cc944941a3fa99f740942ee6bf965052a8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice", + "acc": 0.376, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015325105508898125 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice", + "acc_norm": 0.378, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.015341165254026644 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_5.json b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_5.json new file mode 100644 index 0000000000000000000000000000000000000000..4aa89b0e132fd91516c393c07e816e613bf1198d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_sciq_Multiple-Choice_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "sciq", + "prompt_name": "Multiple Choice", + "acc": 0.355, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_stderr": 0.015139491543780532 + }, + { + "task_name": "sciq", + "prompt_name": "Multiple Choice", + "acc_norm": 0.374, + "dataset_path": "sciq", + "dataset_name": null, + "subset": null, + "acc_norm_stderr": 0.01530876736900637 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_0.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_0.json new file mode 100644 index 0000000000000000000000000000000000000000..e264351c0adbf635e7bb2cba9520b3818a2a9c47 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Answer Given options", + "acc": 0.5018706574024586, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011562351329083268 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Answer Given options", + "acc_norm": 0.5093532870122929, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011560409019420367 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_1.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_1.json new file mode 100644 index 0000000000000000000000000000000000000000..f21a804f5ba6d0b4bd236e28fb9b05ae2d10868f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Answer Given options", + "acc": 0.4922501336183859, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011561043278863545 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Answer Given options", + "acc_norm": 0.5109567076429716, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011559655791130729 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_2.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_2.json new file mode 100644 index 0000000000000000000000000000000000000000..3c04362c7f9829869c1c420fda39e40d3439488b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Answer Given options", + "acc": 0.4794227685729556, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011552636515221858 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Answer Given options", + "acc_norm": 0.49545697487974344, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011561954965856519 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_3.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_3.json new file mode 100644 index 0000000000000000000000000000000000000000..336153d681a2e09b67437c163e52538625016908 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Answer Given options", + "acc": 0.4836985569214324, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011556285484521561 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Answer Given options", + "acc_norm": 0.5056119722073757, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011561703928784337 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_4.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_4.json new file mode 100644 index 0000000000000000000000000000000000000000..ad8f09622fd98f268620c33ae512e68bde8abcd1 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Answer Given options", + "acc": 0.48476750400855156, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011557065368348291 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Answer Given options", + "acc_norm": 0.49812934259754144, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011562351329083266 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_5.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_5.json new file mode 100644 index 0000000000000000000000000000000000000000..6a25352e653b7cd0592d33adb023823bc77a41cf --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Answer-Given-options_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Answer Given options", + "acc": 0.47888829502939606, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011552120807053815 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Answer Given options", + "acc_norm": 0.4938535542490647, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011561558589040751 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_0.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_0.json new file mode 100644 index 0000000000000000000000000000000000000000..ca72ab1ef6aa5747825f4e291761cedc38709fd1 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Choose Story Ending", + "acc": 0.5093532870122929, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011560409019420364 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Choose Story Ending", + "acc_norm": 0.5200427578834848, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.01155313897796101 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_1.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_1.json new file mode 100644 index 0000000000000000000000000000000000000000..ef3039ef5931789a8f44b0997942e71b40e815e0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Choose Story Ending", + "acc": 0.49706039551042225, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011562232421541946 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Choose Story Ending", + "acc_norm": 0.5093532870122929, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011560409019420369 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_2.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_2.json new file mode 100644 index 0000000000000000000000000000000000000000..c41568c044a3f0f342506dd11bb3c3d35b5e5303 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Choose Story Ending", + "acc": 0.4863709246392304, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011558135970599896 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Choose Story Ending", + "acc_norm": 0.4975948690539818, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011562298481438055 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_3.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_3.json new file mode 100644 index 0000000000000000000000000000000000000000..944a9fa36d386321afe92d6fd25c4003a53aba38 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Choose Story Ending", + "acc": 0.4922501336183859, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011561043278863545 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Choose Story Ending", + "acc_norm": 0.4997327632282202, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011562430600098489 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_4.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_4.json new file mode 100644 index 0000000000000000000000000000000000000000..41ea8289ab5610e634a699cc962973b1d410bbdd --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Choose Story Ending", + "acc": 0.4778193479422769, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011551049647290307 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Choose Story Ending", + "acc_norm": 0.49706039551042225, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011562232421541946 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_5.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_5.json new file mode 100644 index 0000000000000000000000000000000000000000..6e12782a302c9c5d1d21b09b3a3e92eefedf2ec3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Choose-Story-Ending_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Choose Story Ending", + "acc": 0.4735435595938001, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011546234813777409 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Choose Story Ending", + "acc_norm": 0.49331908070550506, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011561400034509398 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_0.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_0.json new file mode 100644 index 0000000000000000000000000000000000000000..e572768f5eb4cc144da304dceeca60a58c9da09b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_0.json @@ -0,0 +1,15 @@ +{ + "results": [], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_1.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_1.json new file mode 100644 index 0000000000000000000000000000000000000000..b2d8eb8b45bfd26017221cd75c3392a20818cebe --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_1.json @@ -0,0 +1,15 @@ +{ + "results": [], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_2.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_2.json new file mode 100644 index 0000000000000000000000000000000000000000..7f14d1e044f2fc71eb9d12dce12fc109d0a7a42a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_2.json @@ -0,0 +1,15 @@ +{ + "results": [], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_3.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_3.json new file mode 100644 index 0000000000000000000000000000000000000000..564abff5880b19907254171cfcb6d501ef47b991 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_3.json @@ -0,0 +1,15 @@ +{ + "results": [], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_4.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_4.json new file mode 100644 index 0000000000000000000000000000000000000000..ba0b4fab6cf82166663e7bd34de15a733fcc4999 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_4.json @@ -0,0 +1,15 @@ +{ + "results": [], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_5.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_5.json new file mode 100644 index 0000000000000000000000000000000000000000..24f0b0102953d9e2d0b52d4278514a6134a6941d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Generate-Ending_5.json @@ -0,0 +1,15 @@ +{ + "results": [], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_0.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_0.json new file mode 100644 index 0000000000000000000000000000000000000000..fc413a0e923eb8383d9e4c09677fd11745bfead6 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Novel Correct Ending", + "acc": 0.504008551576697, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011562060664045736 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Novel Correct Ending", + "acc_norm": 0.5114911811865313, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011559378273599123 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_1.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_1.json new file mode 100644 index 0000000000000000000000000000000000000000..d3f9c561cdd380ef435102ccf850c937e7cc51b3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Novel Correct Ending", + "acc": 0.4906467129877071, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011560409019420369 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Novel Correct Ending", + "acc_norm": 0.4991982896846606, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011562417388300193 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_2.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_2.json new file mode 100644 index 0000000000000000000000000000000000000000..287216380c6fdca1cb4998aeea1fe689f2a76eea --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Novel Correct Ending", + "acc": 0.4719401389631213, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011544210396951663 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Novel Correct Ending", + "acc_norm": 0.4949225013361839, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.01156183605423878 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_3.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_3.json new file mode 100644 index 0000000000000000000000000000000000000000..67c9290229c018189e562e21d900a0e5eb52508a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Novel Correct Ending", + "acc": 0.4820951362907536, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011555016408505476 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Novel Correct Ending", + "acc_norm": 0.49599144842330306, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011562060664045727 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_4.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_4.json new file mode 100644 index 0000000000000000000000000000000000000000..e02c0ae43bca96866203fd32142b69482b57a70c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Novel Correct Ending", + "acc": 0.4820951362907536, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011555016408505476 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Novel Correct Ending", + "acc_norm": 0.48690539818278994, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011558466383367178 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_5.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_5.json new file mode 100644 index 0000000000000000000000000000000000000000..51a6db91808710be59df4f2774a374a4abd5e5b8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Novel-Correct-Ending_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Novel Correct Ending", + "acc": 0.4730090860502405, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011545573278697235 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Novel Correct Ending", + "acc_norm": 0.4879743452699091, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011559087533800692 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_0.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_0.json new file mode 100644 index 0000000000000000000000000000000000000000..90eee68373c6f2ee653371b0b9e1f574cea146d8 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Story Continuation and Options", + "acc": 0.5109567076429716, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011559655791130734 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Story Continuation and Options", + "acc_norm": 0.5232495991448424, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.01154992548392746 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_1.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_1.json new file mode 100644 index 0000000000000000000000000000000000000000..4f3a538c4d4a58f54bff0e05a56fe6a8e2f6ce59 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Story Continuation and Options", + "acc": 0.4917156600748263, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.01156084507652571 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Story Continuation and Options", + "acc_norm": 0.5098877605558525, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011560171163157391 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_2.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_2.json new file mode 100644 index 0000000000000000000000000000000000000000..9fcd5e6e61c380759a294e40107201429903ee76 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Story Continuation and Options", + "acc": 0.48743987172634956, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.01155878357073797 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Story Continuation and Options", + "acc_norm": 0.4917156600748263, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.01156084507652571 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_3.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_3.json new file mode 100644 index 0000000000000000000000000000000000000000..6685d0095de6e3a808f2d49dc7e08b6f384ea962 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Story Continuation and Options", + "acc": 0.481560662747194, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011554566910658105 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Story Continuation and Options", + "acc_norm": 0.49812934259754144, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011562351329083266 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_4.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_4.json new file mode 100644 index 0000000000000000000000000000000000000000..8462ba055aef0ffb5aadfdd9157332142d4a1748 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Story Continuation and Options", + "acc": 0.47888829502939606, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.011552120807053817 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Story Continuation and Options", + "acc_norm": 0.4911811865312667, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011560633656952963 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_5.json b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_5.json new file mode 100644 index 0000000000000000000000000000000000000000..2e9722f33a96b5fd4e29479f0f2b710b63ec9b30 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_story_cloze_2016_Story-Continuation-and-Options_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "story_cloze_2016", + "prompt_name": "Story Continuation and Options", + "acc": 0.47033671833244256, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_stderr": 0.01154206650976701 + }, + { + "task_name": "story_cloze_2016", + "prompt_name": "Story Continuation and Options", + "acc_norm": 0.48957776590058794, + "dataset_path": "story_cloze", + "dataset_name": "2016", + "subset": null, + "acc_norm_stderr": 0.011559920087347773 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_0.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_0.json new file mode 100644 index 0000000000000000000000000000000000000000..8164ae5569942b967e8e6cafe8056907a988734d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "GPT-3 style", + "acc": 0.5379061371841155, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030009848912529113 + }, + { + "task_name": "superglue_rte", + "prompt_name": "GPT-3 style", + "acc_norm": 0.48375451263537905, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030080573208738064 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_1.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_1.json new file mode 100644 index 0000000000000000000000000000000000000000..13819697d92852e1458dbe6e516de3a2b5a22512 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "GPT-3 style", + "acc": 0.48375451263537905, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030080573208738064 + }, + { + "task_name": "superglue_rte", + "prompt_name": "GPT-3 style", + "acc_norm": 0.49097472924187724, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030091559826331334 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_2.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_2.json new file mode 100644 index 0000000000000000000000000000000000000000..b2e208affc8b500020cac3fcd1e2ea3e2537b043 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "GPT-3 style", + "acc": 0.5523465703971119, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.02993107036293953 + }, + { + "task_name": "superglue_rte", + "prompt_name": "GPT-3 style", + "acc_norm": 0.555956678700361, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.029907396333795994 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_3.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_3.json new file mode 100644 index 0000000000000000000000000000000000000000..9eac0582f40cc9b78abe4d1c62dab986ca6425f7 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "GPT-3 style", + "acc": 0.5054151624548736, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030094698123239966 + }, + { + "task_name": "superglue_rte", + "prompt_name": "GPT-3 style", + "acc_norm": 0.4981949458483754, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030096267148976633 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_4.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_4.json new file mode 100644 index 0000000000000000000000000000000000000000..a0e8e65fc9a69d149fb742e44738b24dbbe2884f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "GPT-3 style", + "acc": 0.5270758122743683, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030052303463143706 + }, + { + "task_name": "superglue_rte", + "prompt_name": "GPT-3 style", + "acc_norm": 0.5018050541516246, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030096267148976633 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_5.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_5.json new file mode 100644 index 0000000000000000000000000000000000000000..3927d87ac43d01e991beb367f7e645a5e298c457 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_GPT-3-style_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "GPT-3 style", + "acc": 0.48375451263537905, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030080573208738064 + }, + { + "task_name": "superglue_rte", + "prompt_name": "GPT-3 style", + "acc_norm": 0.4729241877256318, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030052303463143706 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_0.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_0.json new file mode 100644 index 0000000000000000000000000000000000000000..183cc53d78fdb00b1d54c38003f9669c9529f10b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "MNLI crowdsource", + "acc": 0.5270758122743683, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030052303463143706 + }, + { + "task_name": "superglue_rte", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.5270758122743683, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030052303463143706 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_1.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_1.json new file mode 100644 index 0000000000000000000000000000000000000000..b7c0f87735312182a405e22eb8de06a94a02158b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "MNLI crowdsource", + "acc": 0.48375451263537905, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030080573208738064 + }, + { + "task_name": "superglue_rte", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.48014440433212996, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030072723167317194 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_2.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ea97e54c45dd919f5bfa1a93b1f2055b1b531020 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "MNLI crowdsource", + "acc": 0.5018050541516246, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030096267148976633 + }, + { + "task_name": "superglue_rte", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.5018050541516246, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030096267148976633 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_3.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_3.json new file mode 100644 index 0000000000000000000000000000000000000000..f7300e3848144dd4ca4c97ed78b7fdcb49a7e04f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "MNLI crowdsource", + "acc": 0.4981949458483754, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030096267148976633 + }, + { + "task_name": "superglue_rte", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.4729241877256318, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030052303463143706 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_4.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_4.json new file mode 100644 index 0000000000000000000000000000000000000000..6467952675f9d99201f6fa808e960edd4bc728af --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "MNLI crowdsource", + "acc": 0.49458483754512633, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030094698123239966 + }, + { + "task_name": "superglue_rte", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.51985559566787, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030072723167317184 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_5.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_5.json new file mode 100644 index 0000000000000000000000000000000000000000..d1fc0eb1421f94c09855c6ca8300a7f542c3da7b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_MNLI-crowdsource_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "MNLI crowdsource", + "acc": 0.49458483754512633, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030094698123239966 + }, + { + "task_name": "superglue_rte", + "prompt_name": "MNLI crowdsource", + "acc_norm": 0.4981949458483754, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030096267148976626 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_0.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_0.json new file mode 100644 index 0000000000000000000000000000000000000000..c07f41cc5d71a783077577cb53cfa7eafecc6e8d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "does it follow that", + "acc": 0.516245487364621, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030080573208738064 + }, + { + "task_name": "superglue_rte", + "prompt_name": "does it follow that", + "acc_norm": 0.5270758122743683, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030052303463143706 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_1.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_1.json new file mode 100644 index 0000000000000000000000000000000000000000..951cfecdc362f4f7d3bb683b43948e873d0e9187 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "does it follow that", + "acc": 0.4981949458483754, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030096267148976633 + }, + { + "task_name": "superglue_rte", + "prompt_name": "does it follow that", + "acc_norm": 0.4981949458483754, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030096267148976626 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_2.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8f84014e0a3c8d24e937b581582743891805fa9a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "does it follow that", + "acc": 0.5054151624548736, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030094698123239966 + }, + { + "task_name": "superglue_rte", + "prompt_name": "does it follow that", + "acc_norm": 0.4981949458483754, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030096267148976626 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_3.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_3.json new file mode 100644 index 0000000000000000000000000000000000000000..c64c23462b1c3761cc0e3afdcc5486ae99c6da13 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "does it follow that", + "acc": 0.48736462093862815, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030086851767188564 + }, + { + "task_name": "superglue_rte", + "prompt_name": "does it follow that", + "acc_norm": 0.5090252707581228, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030091559826331334 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_4.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_4.json new file mode 100644 index 0000000000000000000000000000000000000000..f3b6e123992db107f7de71dea68b029bf04fb3c7 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "does it follow that", + "acc": 0.5090252707581228, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030091559826331334 + }, + { + "task_name": "superglue_rte", + "prompt_name": "does it follow that", + "acc_norm": 0.51985559566787, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030072723167317184 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_5.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_5.json new file mode 100644 index 0000000000000000000000000000000000000000..9f7ab2b9b4f486c697cec63daad7b623f245eee2 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_does-it-follow-that_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "does it follow that", + "acc": 0.48014440433212996, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.0300727231673172 + }, + { + "task_name": "superglue_rte", + "prompt_name": "does it follow that", + "acc_norm": 0.48736462093862815, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030086851767188564 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_0.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_0.json new file mode 100644 index 0000000000000000000000000000000000000000..dc746c9e6cfcc01cc9a0ac2e1c090895a4900cea --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "guaranteed true", + "acc": 0.5342960288808665, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030025579819366422 + }, + { + "task_name": "superglue_rte", + "prompt_name": "guaranteed true", + "acc_norm": 0.5270758122743683, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030052303463143706 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_1.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_1.json new file mode 100644 index 0000000000000000000000000000000000000000..cceab28afebed2fd4efba938b7bc32654b89cac0 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "guaranteed true", + "acc": 0.48014440433212996, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030072723167317194 + }, + { + "task_name": "superglue_rte", + "prompt_name": "guaranteed true", + "acc_norm": 0.48736462093862815, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030086851767188564 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_2.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f1098e69dcdc390bcfe7d8ca55bd6e9378af8f68 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "guaranteed true", + "acc": 0.49458483754512633, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030094698123239966 + }, + { + "task_name": "superglue_rte", + "prompt_name": "guaranteed true", + "acc_norm": 0.49458483754512633, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030094698123239966 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_3.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_3.json new file mode 100644 index 0000000000000000000000000000000000000000..7aa570d1160c016f3daf7d496dae94b106bc46d3 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "guaranteed true", + "acc": 0.4981949458483754, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030096267148976626 + }, + { + "task_name": "superglue_rte", + "prompt_name": "guaranteed true", + "acc_norm": 0.48014440433212996, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.0300727231673172 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_4.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_4.json new file mode 100644 index 0000000000000000000000000000000000000000..834d677408d63a8c2e6bc92a0de9944d4612bfe5 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "guaranteed true", + "acc": 0.47653429602888087, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.03006330041190266 + }, + { + "task_name": "superglue_rte", + "prompt_name": "guaranteed true", + "acc_norm": 0.4981949458483754, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030096267148976633 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_5.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_5.json new file mode 100644 index 0000000000000000000000000000000000000000..e63cac44b6b974f6527f01c5e86829b6ed91853f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_guaranteed-true_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "guaranteed true", + "acc": 0.49458483754512633, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030094698123239966 + }, + { + "task_name": "superglue_rte", + "prompt_name": "guaranteed true", + "acc_norm": 0.5054151624548736, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030094698123239966 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_0.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_0.json new file mode 100644 index 0000000000000000000000000000000000000000..dd8a7b22e633a7a29a62d9120a76668a7d74383c --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "should assume", + "acc": 0.5306859205776173, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.03003973059219781 + }, + { + "task_name": "superglue_rte", + "prompt_name": "should assume", + "acc_norm": 0.5270758122743683, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030052303463143706 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_1.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_1.json new file mode 100644 index 0000000000000000000000000000000000000000..a9fb51df482512084169e7a4a7d0c4738dd9c467 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "should assume", + "acc": 0.4729241877256318, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030052303463143706 + }, + { + "task_name": "superglue_rte", + "prompt_name": "should assume", + "acc_norm": 0.48014440433212996, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.0300727231673172 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_2.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_2.json new file mode 100644 index 0000000000000000000000000000000000000000..049f52db685587ffddaf97fd8c769edbb40bcce1 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "should assume", + "acc": 0.49458483754512633, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030094698123239966 + }, + { + "task_name": "superglue_rte", + "prompt_name": "should assume", + "acc_norm": 0.4981949458483754, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030096267148976626 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_3.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_3.json new file mode 100644 index 0000000000000000000000000000000000000000..2458eb80b847fa17e805ff0feadfc31567d6458e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "should assume", + "acc": 0.48375451263537905, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030080573208738064 + }, + { + "task_name": "superglue_rte", + "prompt_name": "should assume", + "acc_norm": 0.48736462093862815, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030086851767188564 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_4.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_4.json new file mode 100644 index 0000000000000000000000000000000000000000..6cd6e950369cb88cdff89a0101ffbc812153265a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "should assume", + "acc": 0.4729241877256318, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030052303463143706 + }, + { + "task_name": "superglue_rte", + "prompt_name": "should assume", + "acc_norm": 0.48375451263537905, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030080573208738064 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_5.json b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_5.json new file mode 100644 index 0000000000000000000000000000000000000000..7f172029216b36b0bee20f36cd73a565a16bfecf --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_superglue_rte_should-assume_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "superglue_rte", + "prompt_name": "should assume", + "acc": 0.48736462093862815, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_stderr": 0.030086851767188564 + }, + { + "task_name": "superglue_rte", + "prompt_name": "should assume", + "acc_norm": 0.5090252707581228, + "dataset_path": "super_glue", + "dataset_name": "rte", + "subset": null, + "acc_norm_stderr": 0.030091559826331334 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_0.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_0.json new file mode 100644 index 0000000000000000000000000000000000000000..10e90849a87a71fde0ea8f332e9333d3bd089bc4 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "Replace", + "acc": 0.505130228887135, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014051745961790513 + }, + { + "task_name": "winogrande", + "prompt_name": "Replace", + "acc_norm": 0.494869771112865, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014051745961790523 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_1.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_1.json new file mode 100644 index 0000000000000000000000000000000000000000..5f07ad4cd915d698b5002c5e3efbb4926012f3a1 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "Replace", + "acc": 0.5059194948697711, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.01405150083848581 + }, + { + "task_name": "winogrande", + "prompt_name": "Replace", + "acc_norm": 0.4972375690607735, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052271211616448 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_2.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_2.json new file mode 100644 index 0000000000000000000000000000000000000000..955c936ebbc73e3b0d8c37ba3aa879164df1b119 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "Replace", + "acc": 0.4980268350434096, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014052376259225636 + }, + { + "task_name": "winogrande", + "prompt_name": "Replace", + "acc_norm": 0.4964483030781373, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052131146915867 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_3.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_3.json new file mode 100644 index 0000000000000000000000000000000000000000..bcd7c245e621e6106eb352b47bf61e74ed84d7d4 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "Replace", + "acc": 0.5090765588003157, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014050170094497707 + }, + { + "task_name": "winogrande", + "prompt_name": "Replace", + "acc_norm": 0.48855564325177586, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014048804199859322 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_4.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_4.json new file mode 100644 index 0000000000000000000000000000000000000000..888024864e28faf56bca95d342832f7fc18b5330 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "Replace", + "acc": 0.5011838989739542, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014052446290529015 + }, + { + "task_name": "winogrande", + "prompt_name": "Replace", + "acc_norm": 0.49013417521704816, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014049749833367592 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_5.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_5.json new file mode 100644 index 0000000000000000000000000000000000000000..5dc9f254337a255bc5f0606148617101cddc79e5 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_Replace_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "Replace", + "acc": 0.5114443567482242, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014048804199859325 + }, + { + "task_name": "winogrande", + "prompt_name": "Replace", + "acc_norm": 0.4925019731649566, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.01405090552122858 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_0.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_0.json new file mode 100644 index 0000000000000000000000000000000000000000..f7d0d92462608512c985b2ec5a7d240afc97199b --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "True or False", + "acc": 0.4964483030781373, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.01405213114691586 + }, + { + "task_name": "winogrande", + "prompt_name": "True or False", + "acc_norm": 0.494869771112865, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014051745961790516 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_1.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_1.json new file mode 100644 index 0000000000000000000000000000000000000000..2c218f7075db92ea6239fab447483e338ef76883 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "True or False", + "acc": 0.4925019731649566, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014050905521228577 + }, + { + "task_name": "winogrande", + "prompt_name": "True or False", + "acc_norm": 0.49329123914759276, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014051220692330349 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_2.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f068602ff32bb8dcebabfe0c58d23edf0505e53d --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "True or False", + "acc": 0.5027624309392266, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014052271211616441 + }, + { + "task_name": "winogrande", + "prompt_name": "True or False", + "acc_norm": 0.4940805051302289, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014051500838485807 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_3.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_3.json new file mode 100644 index 0000000000000000000000000000000000000000..46d528e67bee47e7d4938bd21d3c846d06d0c975 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "True or False", + "acc": 0.516179952644041, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.0140451261309786 + }, + { + "task_name": "winogrande", + "prompt_name": "True or False", + "acc_norm": 0.5011838989739542, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052446290529019 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_4.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_4.json new file mode 100644 index 0000000000000000000000000000000000000000..0bf38e507efb60faa7c3af88dd918d1986130441 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "True or False", + "acc": 0.500394632991318, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014052481306049516 + }, + { + "task_name": "winogrande", + "prompt_name": "True or False", + "acc_norm": 0.4996053670086819, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052481306049512 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_5.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_5.json new file mode 100644 index 0000000000000000000000000000000000000000..b116164903ab38602de7930fa370fec377f5c0f4 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_True-or-False_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "True or False", + "acc": 0.5011838989739542, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014052446290529019 + }, + { + "task_name": "winogrande", + "prompt_name": "True or False", + "acc_norm": 0.5011838989739542, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052446290529012 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_0.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_0.json new file mode 100644 index 0000000000000000000000000000000000000000..4f5620af020ee814d36078f13ea94a20e1133381 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "does underscore refer to", + "acc": 0.5067087608524072, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014051220692330349 + }, + { + "task_name": "winogrande", + "prompt_name": "does underscore refer to", + "acc_norm": 0.49329123914759276, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014051220692330349 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_1.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_1.json new file mode 100644 index 0000000000000000000000000000000000000000..30b997d61297849853e1e13f68a488260a519623 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "does underscore refer to", + "acc": 0.5169692186266772, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014044390401612976 + }, + { + "task_name": "winogrande", + "prompt_name": "does underscore refer to", + "acc_norm": 0.5011838989739542, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052446290529015 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_2.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_2.json new file mode 100644 index 0000000000000000000000000000000000000000..6e59c7775f798df45f347c19405db7df7e621471 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "does underscore refer to", + "acc": 0.489344909234412, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.0140492945362904 + }, + { + "task_name": "winogrande", + "prompt_name": "does underscore refer to", + "acc_norm": 0.4964483030781373, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052131146915853 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_3.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_3.json new file mode 100644 index 0000000000000000000000000000000000000000..6ea0bf184950e55725e87e033aa0f7c78f62c64f --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "does underscore refer to", + "acc": 0.500394632991318, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014052481306049516 + }, + { + "task_name": "winogrande", + "prompt_name": "does underscore refer to", + "acc_norm": 0.4877663772691397, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014048278820405616 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_4.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_4.json new file mode 100644 index 0000000000000000000000000000000000000000..47839f9065a742f0acf2dfb0a50ab45cf9faaa6a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "does underscore refer to", + "acc": 0.48855564325177586, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014048804199859332 + }, + { + "task_name": "winogrande", + "prompt_name": "does underscore refer to", + "acc_norm": 0.49013417521704816, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014049749833367596 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_5.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_5.json new file mode 100644 index 0000000000000000000000000000000000000000..96170b004caec9fce72551d38111031626349096 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_does-underscore-refer-to_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "does underscore refer to", + "acc": 0.4964483030781373, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014052131146915852 + }, + { + "task_name": "winogrande", + "prompt_name": "does underscore refer to", + "acc_norm": 0.500394632991318, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052481306049516 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_0.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_0.json new file mode 100644 index 0000000000000000000000000000000000000000..ac1a1b97bb191cb7ff5821675ab2c12cb35cb076 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "stand for", + "acc": 0.5256511444356748, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014033980956108558 + }, + { + "task_name": "winogrande", + "prompt_name": "stand for", + "acc_norm": 0.5138121546961326, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014047122916440419 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_1.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_1.json new file mode 100644 index 0000000000000000000000000000000000000000..0ee7afc74d6c08a334449026d6b70804d2f91486 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "stand for", + "acc": 0.5146014206787688, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014046492383275832 + }, + { + "task_name": "winogrande", + "prompt_name": "stand for", + "acc_norm": 0.500394632991318, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052481306049516 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_2.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_2.json new file mode 100644 index 0000000000000000000000000000000000000000..39e5cb902497e7cc0a4d66d25a0ca7ddd15d9985 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "stand for", + "acc": 0.5082872928176796, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014050555322824189 + }, + { + "task_name": "winogrande", + "prompt_name": "stand for", + "acc_norm": 0.5027624309392266, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052271211616441 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_3.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_3.json new file mode 100644 index 0000000000000000000000000000000000000000..3dd92ff6df5d29f0441461a31fb0f011fe0df0db --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "stand for", + "acc": 0.49329123914759276, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014051220692330349 + }, + { + "task_name": "winogrande", + "prompt_name": "stand for", + "acc_norm": 0.4925019731649566, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014050905521228577 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_4.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_4.json new file mode 100644 index 0000000000000000000000000000000000000000..bba15a524086fc26873c277f66b7331a5e42e8d5 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "stand for", + "acc": 0.5035516969218626, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014052131146915867 + }, + { + "task_name": "winogrande", + "prompt_name": "stand for", + "acc_norm": 0.4988161010260458, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052446290529015 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_5.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_5.json new file mode 100644 index 0000000000000000000000000000000000000000..83ee1e9ebb749c2c5417801a9a4d37da8f0d5549 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_stand-for_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "stand for", + "acc": 0.5114443567482242, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014048804199859329 + }, + { + "task_name": "winogrande", + "prompt_name": "stand for", + "acc_norm": 0.516179952644041, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014045126130978601 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_0.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_0.json new file mode 100644 index 0000000000000000000000000000000000000000..2c833a7aa9e67c16bc58aa487717004a1a6e9b2e --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_0.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "underscore refer to", + "acc": 0.5130228887134964, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.01404771839399767 + }, + { + "task_name": "winogrande", + "prompt_name": "underscore refer to", + "acc_norm": 0.4964483030781373, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052131146915857 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_1.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_1.json new file mode 100644 index 0000000000000000000000000000000000000000..8e53e9fc4c7f4b6c0a5d55c45cdc0b1c68a191e4 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_1.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "underscore refer to", + "acc": 0.5027624309392266, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014052271211616441 + }, + { + "task_name": "winogrande", + "prompt_name": "underscore refer to", + "acc_norm": 0.5019731649565904, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052376259225636 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_2.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_2.json new file mode 100644 index 0000000000000000000000000000000000000000..28c416e3ae8c281ba878cc44cedbd823783332bd --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_2.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "underscore refer to", + "acc": 0.5011838989739542, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014052446290529019 + }, + { + "task_name": "winogrande", + "prompt_name": "underscore refer to", + "acc_norm": 0.505130228887135, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014051745961790513 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_3.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_3.json new file mode 100644 index 0000000000000000000000000000000000000000..c37dae4b489ca84d7a35d233f0930d0c7fedd75a --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_3.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "underscore refer to", + "acc": 0.500394632991318, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014052481306049516 + }, + { + "task_name": "winogrande", + "prompt_name": "underscore refer to", + "acc_norm": 0.5011838989739542, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014052446290529015 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_4.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b3c32b36e8a0a7c8716cdd27fee43dd3376041d9 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_4.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "underscore refer to", + "acc": 0.49013417521704816, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014049749833367596 + }, + { + "task_name": "winogrande", + "prompt_name": "underscore refer to", + "acc_norm": 0.4925019731649566, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.014050905521228573 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_5.json b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_5.json new file mode 100644 index 0000000000000000000000000000000000000000..19379115d88aabf036cfa8a288db53584a629f46 --- /dev/null +++ b/146m14b14b/eval/slim.lm1-146m-14b_winogrande_underscore-refer-to_5.json @@ -0,0 +1,34 @@ +{ + "results": [ + { + "task_name": "winogrande", + "prompt_name": "underscore refer to", + "acc": 0.48697711128650356, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_stderr": 0.014047718393997663 + }, + { + "task_name": "winogrande", + "prompt_name": "underscore refer to", + "acc_norm": 0.4877663772691397, + "dataset_path": "winogrande", + "dataset_name": "winogrande_xl", + "subset": null, + "acc_norm_stderr": 0.01404827882040562 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:21.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:21.json new file mode 100644 index 0000000000000000000000000000000000000000..5e418a12f243350657fbc59e5fc32a6eb23aff01 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:21.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.2273593088129955, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.023048422160974854}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06534546646292078, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0022653967634482346}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.21043781234205539, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003996208208082882}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08670665367464021, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020039073276409366}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.026820845156803847, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0014207849027619004}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09549460860341967, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0026162181403760645}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.036699311161718554, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011761982937247013}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.062094192289416324, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0021325935928020773}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.20451582032836602, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0038946312490941164}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08296705397653721, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018647461030262618}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.062452077921763396, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0021917167381854613}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.20334748275991352, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0038281910048227016}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08287948300662376, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001877936799895705}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..c4eb5a681259326138a7b1a13d76aaf5eeae3e24 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.2334282280458586, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.019052307328308617}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06664206812457203, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0022521713907757224}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.22011552548283286, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00410978487191024}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08900921610644012, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020657069941074835}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.02742604267640331, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0012236389813676137}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10166270849980795, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0026960826428279448}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03844075075586678, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011778262164477038}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06259979613562147, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0020549501026142056}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21306622153871427, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003978348602912166}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.0844769580786778, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018659122044865008}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06293334235936807, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002091916854143351}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.21239997123355958, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003944956006147718}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.0845681547310993, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001889837918272996}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..a33f7a990d0f53898d0e9dca32cf917a045621cf --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.23704825822381498, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.027670400496745257}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06174874381710095, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018845302725776602}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.21730574055132956, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00401363972509449}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08585565394877333, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00199569997327382}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.025823917902136784, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009656291224127635}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09983273352200027, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002662212695695811}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03728553813713636, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00115452384616963}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.058172511770298356, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0016848170587776589}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21029648393397446, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003872029478285546}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.0816882805637377, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018106503439831681}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.058665437136628244, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017389144610245677}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.21049148580432653, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003861866479426883}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08201910289112235, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018386813277759032}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.json new file mode 100644 index 0000000000000000000000000000000000000000..8e5a4a2bdb5b3cbc211ef08f83096e7541d8369e --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.22386799285100242, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.025018167341179967}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.06270503422215706, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018889830987665244}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.22196309199959086, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003977028632566687}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08717467473249238, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001988496293821803}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.02636193213072692, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009948761975072144}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10182548952971911, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0026075444055286934}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03760470124518723, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011212849650132507}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.058872164432613124, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0016543717782211067}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2150079269966024, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0038312866185628972}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08288446430861876, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017917823158372869}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.059266742576785554, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001700146105139763}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2146878788305741, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003813273422443642}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08313479823551378, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018229722576069702}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.json new file mode 100644 index 0000000000000000000000000000000000000000..1bcccd4da93cd0be8f3381180fbb6973d0a9c882 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.2172353235676392, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.022164989159983352}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.065980949123901, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002147721445587754}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.226392298167293, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0041042887852570055}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.09099601606039612, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022125069667482317}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.027910124577921847, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001217123854556207}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10372904873293698, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0027182548458276256}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03929727146315665, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012472270528183318}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06123190050383225, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018493227441710388}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21837596562168848, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00393427728058764}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08582932917108592, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019694454130826677}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.061481184925116956, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018863336098335244}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.21760550986389007, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003896708014396261}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08587389536022703, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019923488945407276}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:15.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:15.json new file mode 100644 index 0000000000000000000000000000000000000000..c5f704e382edc46daf2bafe3f5bbb5d0a9cc712e --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:15.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.11957570551320973, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017122678910133295}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1255131562764684, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0018776761004921133}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1065060745573131, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001343010186487139}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.008514681910363194, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005650521915479279}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.009948502518022673, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000628599205824894}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.007682465512529915, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0004379081883445011}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.09608447326527465, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013633991908894674}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.10091203512154058, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0014635688317212099}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08492259997972296, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.000992809060772017}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.11463279913059665, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016245587467926714}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.12015283120807896, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0017669328759193385}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.1019746781540952, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0012588460161316543}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.5846163931718847, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05687316221546765}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..0259bf3a7b1c414f1ca9cbcb44362b79a1075ee8 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.13646669155030028, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00220310186554878}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.14546845977536896, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002185736272211402}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.12041833327196463, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001596604081334374}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.01724386477933621, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008686647411341404}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.01912476010235633, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000862139084747869}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.01499749394448183, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000625100665952798}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.11051635705738314, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001756238864615761}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.11966135440554557, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0017926753508674628}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.09741475108454614, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001214366654297324}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.12908957637877216, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0020679024538660115}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.13765696723373635, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0020581929100874927}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.11380484706597256, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001488577909873337}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.1784483013564102, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06904095646323807}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..50650b2d059f2c8896d58f22f74292ae4fc51ca6 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.13009014582477282, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0027117628648043474}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.12804528790016462, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002344428985321143}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.10566275122482133, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001743438719102012}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.01992958977730361, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001100438026092398}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.018863080954515433, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009086120229391634}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.014741970258181074, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0006322524751793364}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.10732457844198273, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0022127410001868135}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.10756202262646183, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001971425139131646}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08739180884548453, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013975216677133634}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.12199094138547152, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0025385358079340996}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.12012956265605543, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002193175745267735}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.09896289382222027, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016138864108605157}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.0733305687102461, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07881693779240048}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.json new file mode 100644 index 0000000000000000000000000000000000000000..5432fc946a4290752093230795ee855f74ef8a77 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.04847824168899397, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0021527306994951743}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.04290155774449955, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0017685252209751602}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.03692896653093653, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001444978729387553}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.00916634461711251, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008793312023838661}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.007207669889606505, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0006015119954053042}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.006267683793955542, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005005058667584872}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.04072462596685456, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001826606480591984}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.03615887229029135, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0014729717928229518}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.03090192793357971, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001194549842708322}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.04542034910903916, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0020204702947843995}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.039970284683949656, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0016443164447444867}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.03442247041414027, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013400879274452296}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.09489512058813623, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.015019543669611902}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.json new file mode 100644 index 0000000000000000000000000000000000000000..ddb3f430a68359bae6cd9c709bf24c3fb6459466 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.009001052783311397, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0010831466573619046}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.006686272162881273, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0007630575014863376}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.006073432028332952, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006628007918517945}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0023261262222902496, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005664528532742944}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0013364635095880378, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00031600635668507955}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0012136209926164965, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0002535102063017258}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.007740597546289337, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0009578721435880855}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.005711288605340614, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0006470161855133605}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.005151805662253334, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.000567191022951449}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.008661280593754251, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010585074288308866}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.00629402279957811, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.000708473118264577}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.005764216603488259, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0006312612920070436}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.7570330415242193e-11, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 9.79306473294127e-11}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.json new file mode 100644 index 0000000000000000000000000000000000000000..5f32eccd0c31cf8a2e2586173e0078b43bc9726c --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 6.473555746711545, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11991319165714288}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.40761719747303815, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0029713394877934557}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.32622915467474406, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025105159843268037}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.34215561381708015, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021672296322155556}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.15409023153796192, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002113014573746519}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.12084762554962568, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016765309750084614}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.12725114846534297, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00164076771900096}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.30083315187334836, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002499830888463013}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.23875130433521696, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019821323260998534}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.25088591074914646, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017699934884865026}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.33648017824876714, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0027477983497877084}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.26827573657482673, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022410349466470796}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.28175550985457437, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020169711983159056}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..7f01a5d9f48dd60877b38195b2c04c1d5dfd0e8a --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 7.633962606953475, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11159543603672754}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4202208020040644, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003044005246951365}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.3432310437592059, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025043229759053897}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.356668169483382, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021607608835383096}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.17155302692760083, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0021420775665914075}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.1381486339251807, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017426260823225216}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.14338933259648326, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016391755775913735}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3143755923201533, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002563069353077956}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.25646642804127656, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020573152371357105}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2659375155003453, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017894307181340716}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.349974374493684, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0028149623026241734}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.2856269773413544, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002309169188218799}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.29672587998555633, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002046175837266347}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..910cac73c895e73312a9473d40cf6c6d01e91d84 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 8.542989864092585, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1433846377318832}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4336285483775792, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030892429977636836}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.34827761893603293, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025314100460238995}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.363850648547613, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021839425460877635}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.18892045173233854, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00228916384509369}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.14857093701220483, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017945593778511408}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.15547063329696245, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001703404947537638}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.33571339839716463, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002715659831835022}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.26744895745664315, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020800046877521064}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.27970257938801435, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018380493665157282}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.36855797100423715, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0029209189793231666}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.2947888159986677, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023280241517856827}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.308271697646505, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020717492761956327}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json new file mode 100644 index 0000000000000000000000000000000000000000..9b6e6c622c39abccedcd16634c33d33a4ac627e7 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 8.942155482913693, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.16097618995677943}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.43622486474795197, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003180231455917317}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.34794080588594495, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025302437862891095}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.36393958053094144, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022056875684752482}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.19328105202965348, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0023520595209739854}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.1512965958463667, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018449526665716308}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.15830475389068993, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0017490360106332174}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3379579590729962, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002768090228521333}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.26863807145078394, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00211887791687035}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.28058628929067725, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001866861403319837}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3722310719989836, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003015809692521337}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.29631987882427524, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023825505361464985}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.30998668589677814, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002129216424796756}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json new file mode 100644 index 0000000000000000000000000000000000000000..e641d6ec1e5f5d691c972e3a70d00dd6f058c504 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 9.075675826005247, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1653441614852989}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.43717237782424556, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032131153990395105}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.34970875370492777, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002492021825922423}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.3655627727747465, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002179592031705995}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.19475535986053671, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002374332976786843}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.15278033886088896, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018229656355282099}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.15987755776745063, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001746307435561507}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3431844847260117, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002816860200252875}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.2738841977693453, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021185239782291246}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.28581073763147397, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018731672478296468}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3753378586075155, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0030325254453903456}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.2998624179944231, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023395619025335535}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.31334964121857656, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002093593744190019}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.json new file mode 100644 index 0000000000000000000000000000000000000000..6e3c27c60c8fd0f6c6f86e547b6f5eb2d6794616 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.12212339174292709, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00227587516471315}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.21119172510668496, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0038426678932757265}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.14414241409467604, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002375277032964203}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.015557546867072657, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009185461625331802}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.03139695878394972, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017910321872932162}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.019599702192861963, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011047188456746039}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.09647648601466811, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0016587561042236644}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.1698329781218389, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002988542446170774}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11454550827536823, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001745331955977972}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.09487372714642431, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017364409448969846}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.16552982169201422, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0031000761747263053}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.1122073254536097, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018494671901998726}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.9731159077387963, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11074536852276136}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..7debc841635057e2ba3ebcdfd2ac7cc76e73160a --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.12823223548348878, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0025565036587658725}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2143351214598643, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0036128802474860813}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1474750330894204, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002352105574798546}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.017523161513321936, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009922032751105664}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.03274318045581887, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017433402857737756}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.02113796673167594, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011185423587926665}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10370089292267248, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001970850041369119}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.17671366292027554, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0028713475720421144}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.12005256994367001, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017848482742927902}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.10049270031173661, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002030546802009858}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.16875347685809536, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002920858053558713}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.11549675473891775, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018543831002685392}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.8946101579944589, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11447862558731768}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..c086327ddefeb9b789672b8b33f6cf7a8e231566 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.13560916718487, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030925128958130092}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.1989284367405898, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0037001652881557775}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.14593815028467524, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0026868363513368075}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.019698420478264804, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0012604026842234879}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.030787306962827925, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001718481660739863}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.021594887074556493, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011983258613855398}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10928242080406224, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002413360587361133}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.16472649296075753, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00306995614023579}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11855231312063658, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002059391483708031}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.10597385450714689, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002479253447768338}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.15617920045628825, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0030070719552520047}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.11374940769267439, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002097072555940818}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.9245086188793217, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.14099807544420506}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json new file mode 100644 index 0000000000000000000000000000000000000000..4712eaab1a616340c5c149149c7d5a65b8975ff8 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.037913585028776065, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0023001502602127523}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.04985852283454599, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003059874154622636}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.03937861378098916, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002315856820827257}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0049564518309591745, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005552167801086178}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00808101754829496, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009432130706742604}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.005653435053950428, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0006134888531248025}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.03066492611348946, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018517687252350952}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0409686857380869, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0025238815188420474}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.031970816594020374, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018597075082346828}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.030346836341775464, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018562425966080272}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.04007296257687896, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00249432492023191}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.03145276305797317, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018465561902809033}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.1945677197809648, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04427282782106186}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json new file mode 100644 index 0000000000000000000000000000000000000000..8d873240ec729991040849fa5d7400ef1bce3365 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.003591191401452728, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001020843909570341}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0026658820558568995, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0007715098804589186}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0029312590283442222, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0008258312479225911}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0006103958934147612, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0003006367876943161}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.0004676141468594298, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0002456567053766645}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.0005247055544583527, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00026825774796811777}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.00292309163386754, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0008213440292977818}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0021724670251771485, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0006306676973825689}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0023789038881630656, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0006611391677521927}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0030374426916148236, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0008522737295493769}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.0022504336554593878, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0006494473025743668}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0024716209620122146, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0006863922396788282}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 4.562392577706544e-45, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 7.886084839478574e-39}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:21.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:21.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..574f28370eb8703d0dc3d7001be8855263df19c8 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:21.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9446873e51911c8b8686705751c9182be1150b9629903cf106db81836c3a8679 +size 4866419 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2951a5f9e4a348ec60caddef45fab419346ee5c3 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e13dc073febb60132909277224be8514c55e4647b9d7085910a8d8d8c4ec7f80 +size 5799135 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..db7544d4faf0794ce16a5bbfb8470e73fbec389b --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46df8d8bb0dc1ef85bb8e68b119b3b32f653559d29a61b8cfe1ec993ef5e6a37 +size 6758841 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4b37b452be6ed6b2e307f143110608a3312c6db3 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8618904072081874b2483fbc388d017c5340beadddcf13d0734e4f634b85b9b4 +size 7675980 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..62ee1573deb162871dba0489b649b5b15d11df17 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a9ad02336238dedf28d742fce41213d01a84c61e3e5d7ddd176041093822a2d +size 8566260 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:15.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:15.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1cfdb9a7eae3fd0c7850e35ac5553082b6acb930 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:15.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db9c12da9e4cf1626f142335d02f8d93dbf57c0b903fba341f40482d2808a5bf +size 13071382 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a8ec0e37129a1654ff5e043ec1341657bbb2b942 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f437dc0ce2cc5c37ec85bf6e57a16fe7d967720f9b4f6709d8cf9fc1f33a3f42 +size 18706107 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5ae7718b9b79666c65f63a8dc916b1bee70551b6 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:920459cd4c2dc2c9010666431445b53ede2a371139364e931c07b0cb2e65ff9f +size 24171933 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d775f70ee44dda52eab151dd2cb65a8373221d40 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f2ed26bbd59327458fb16e4591d8d1e301984314bca98c00580efa523378120 +size 29408476 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..256b0f91b503b06b7049c40ecd51ca8eee9dca99 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6521cd078e8c7ea3fb6565b437f88e567954ec67433e2204cdcfe08b8118b23c +size 34787862 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6dc37229a39101416a1314eaad73c0548f1a9afb --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ddfbb1fa7ca8a7b8eeac7f67dfbcd776caef7500811e9609592bc8b5fcba548 +size 5031183 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..44cbc9688ff8a759a0673e358837c25cae5f7878 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2155de295f7fd06379fa19fac9f2572fe5fcbca29c700433d7c31fa18d4b75f9 +size 6128025 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..34f09cee54a4191c7c83acc91dcb1d47fa084393 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1944f5e451f8198ec57b9d74d70f2a67ad0f8aac881b7d9275fdce5d2a437a8 +size 7206296 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9be5c8151d6e47925e2ef8f0f2d35315d1508d42 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dda97aae89a4071bf4df35cfd7c287d23b4bd6bf87f6914ae6cf04e063d8f2c +size 8285498 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..21ea1e7fdaeb4958362bdfb665c04c258f31ed88 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:701b579b0a353f5537e8b34c1ebd62e781ff91f583293d2d7b9d24733ab9967a +size 9374803 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..145b203cd5c0c200a13fa89f0705b1fbc0f9c05d --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c89d6cc89134fda75c681bf6000c03952e671b8419fb233c66980723348facb0 +size 5019383 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..30f750bb2240724bd213e5c5662d5e69a7a45768 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ea27f826bd4cfca4246bc719b10ac0f091e70a53954632d9d59e75345d3df9 +size 7299977 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..384ff61809f67f22490e1cd2d5759ffb608580e6 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40047f241306d1d946dfe03a3f709ff1eb9b38c816924cea217fe8776a8e64e5 +size 9558078 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..41c08c3326ff1d4571f59f43e06a1e749cabd10b --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2e0de026464f7f68c0559cd92b3e46741eaceddef8335c60113509bf37e188d +size 11648689 diff --git a/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.jsonl b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..72039f06b5991acacef564fe4eaab3be4341cf7c --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ed6d638a5c98b4acfe1ca4f368c675c8183dc4502927976ca9a1c32e18efb8 +size 13897293 diff --git a/146m14b14b/evaluation/generation/merged_1shots.csv b/146m14b14b/evaluation/generation/merged_1shots.csv new file mode 100644 index 0000000000000000000000000000000000000000..edfbf105470f4fa4ca8e247fd7f5b1f9f18468ad --- /dev/null +++ b/146m14b14b/evaluation/generation/merged_1shots.csv @@ -0,0 +1,5 @@ +dataset,prompt,metric,value +e2e_nlg_cleaned,generate_text_restaurant,rouge2_fmeasure,0.12725114846534297 +gem_xsum,article_DOC_summary,rouge2_fmeasure,0.019599702192861963 +web_nlg_en,PALM_prompt,rouge2_fmeasure,0.036699311161718554 +wiki_lingua_en,tldr_en,rouge2_fmeasure,0.007682465512529915 diff --git a/146m14b14b/evaluation/generation/merged_1shots.json b/146m14b14b/evaluation/generation/merged_1shots.json new file mode 100644 index 0000000000000000000000000000000000000000..d8d444ec0f6a3d68a6e04ab369396b1f0dd49bb3 --- /dev/null +++ b/146m14b14b/evaluation/generation/merged_1shots.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"PALM_prompt": {"bleu": 0.2273593088129955, "bleu_stderr": 0.023048422160974854, "rouge1_fmeasure": 0.08670665367464021, "rouge1_fmeasure_stderr": 0.0020039073276409366, "rouge1_precision": 0.06534546646292078, "rouge1_precision_stderr": 0.0022653967634482346, "rouge1_recall": 0.21043781234205539, "rouge1_recall_stderr": 0.003996208208082882, "rouge2_fmeasure": 0.036699311161718554, "rouge2_fmeasure_stderr": 0.0011761982937247013, "rouge2_precision": 0.026820845156803847, "rouge2_precision_stderr": 0.0014207849027619004, "rouge2_recall": 0.09549460860341967, "rouge2_recall_stderr": 0.0026162181403760645, "rougeL_fmeasure": 0.08296705397653721, "rougeL_fmeasure_stderr": 0.0018647461030262618, "rougeL_precision": 0.062094192289416324, "rougeL_precision_stderr": 0.0021325935928020773, "rougeL_recall": 0.20451582032836602, "rougeL_recall_stderr": 0.0038946312490941164, "rougeLsum_fmeasure": 0.08287948300662376, "rougeLsum_fmeasure_stderr": 0.001877936799895705, "rougeLsum_precision": 0.062452077921763396, "rougeLsum_precision_stderr": 0.0021917167381854613, "rougeLsum_recall": 0.20334748275991352, "rougeLsum_recall_stderr": 0.0038281910048227016}}, "GEM/wiki_lingua_en": {"tldr_en": {"bleu": 0.5846163931718847, "bleu_stderr": 0.05687316221546765, "rouge1_fmeasure": 0.1065060745573131, "rouge1_fmeasure_stderr": 0.001343010186487139, "rouge1_precision": 0.11957570551320973, "rouge1_precision_stderr": 0.0017122678910133295, "rouge1_recall": 0.1255131562764684, "rouge1_recall_stderr": 0.0018776761004921133, "rouge2_fmeasure": 0.007682465512529915, "rouge2_fmeasure_stderr": 0.0004379081883445011, "rouge2_precision": 0.008514681910363194, "rouge2_precision_stderr": 0.0005650521915479279, "rouge2_recall": 0.009948502518022673, "rouge2_recall_stderr": 0.000628599205824894, "rougeL_fmeasure": 0.08492259997972296, "rougeL_fmeasure_stderr": 0.000992809060772017, "rougeL_precision": 0.09608447326527465, "rougeL_precision_stderr": 0.0013633991908894674, "rougeL_recall": 0.10091203512154058, "rougeL_recall_stderr": 0.0014635688317212099, "rougeLsum_fmeasure": 0.1019746781540952, "rougeLsum_fmeasure_stderr": 0.0012588460161316543, "rougeLsum_precision": 0.11463279913059665, "rougeLsum_precision_stderr": 0.0016245587467926714, "rougeLsum_recall": 0.12015283120807896, "rougeLsum_recall_stderr": 0.0017669328759193385}}, "e2e_nlg_cleaned": {"generate_text_restaurant": {"bleu": 6.473555746711545, "bleu_stderr": 0.11991319165714288, "rouge1_fmeasure": 0.34215561381708015, "rouge1_fmeasure_stderr": 0.0021672296322155556, "rouge1_precision": 0.40761719747303815, "rouge1_precision_stderr": 0.0029713394877934557, "rouge1_recall": 0.32622915467474406, "rouge1_recall_stderr": 0.0025105159843268037, "rouge2_fmeasure": 0.12725114846534297, "rouge2_fmeasure_stderr": 0.00164076771900096, "rouge2_precision": 0.15409023153796192, "rouge2_precision_stderr": 0.002113014573746519, "rouge2_recall": 0.12084762554962568, "rouge2_recall_stderr": 0.0016765309750084614, "rougeL_fmeasure": 0.25088591074914646, "rougeL_fmeasure_stderr": 0.0017699934884865026, "rougeL_precision": 0.30083315187334836, "rougeL_precision_stderr": 0.002499830888463013, "rougeL_recall": 0.23875130433521696, "rougeL_recall_stderr": 0.0019821323260998534, "rougeLsum_fmeasure": 0.28175550985457437, "rougeLsum_fmeasure_stderr": 0.0020169711983159056, "rougeLsum_precision": 0.33648017824876714, "rougeLsum_precision_stderr": 0.0027477983497877084, "rougeLsum_recall": 0.26827573657482673, "rougeLsum_recall_stderr": 0.0022410349466470796}}, "gem_xsum": {"article_DOC_summary": {"bleu": 0.9731159077387963, "bleu_stderr": 0.11074536852276136, "rouge1_fmeasure": 0.14414241409467604, "rouge1_fmeasure_stderr": 0.002375277032964203, "rouge1_precision": 0.12212339174292709, "rouge1_precision_stderr": 0.00227587516471315, "rouge1_recall": 0.21119172510668496, "rouge1_recall_stderr": 0.0038426678932757265, "rouge2_fmeasure": 0.019599702192861963, "rouge2_fmeasure_stderr": 0.0011047188456746039, "rouge2_precision": 0.015557546867072657, "rouge2_precision_stderr": 0.0009185461625331802, "rouge2_recall": 0.03139695878394972, "rouge2_recall_stderr": 0.0017910321872932162, "rougeL_fmeasure": 0.11454550827536823, "rougeL_fmeasure_stderr": 0.001745331955977972, "rougeL_precision": 0.09647648601466811, "rougeL_precision_stderr": 0.0016587561042236644, "rougeL_recall": 0.1698329781218389, "rougeL_recall_stderr": 0.002988542446170774, "rougeLsum_fmeasure": 0.1122073254536097, "rougeLsum_fmeasure_stderr": 0.0018494671901998726, "rougeLsum_precision": 0.09487372714642431, "rougeLsum_precision_stderr": 0.0017364409448969846, "rougeLsum_recall": 0.16552982169201422, "rougeLsum_recall_stderr": 0.0031000761747263053}}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/merged_2shots.csv b/146m14b14b/evaluation/generation/merged_2shots.csv new file mode 100644 index 0000000000000000000000000000000000000000..aaa67dd675865e769341afe444f46e951c75eaa7 --- /dev/null +++ b/146m14b14b/evaluation/generation/merged_2shots.csv @@ -0,0 +1,5 @@ +dataset,prompt,metric,value +e2e_nlg_cleaned,generate_text_restaurant,rouge2_fmeasure,0.14338933259648326 +gem_xsum,article_DOC_summary,rouge2_fmeasure,0.02113796673167594 +web_nlg_en,PALM_prompt,rouge2_fmeasure,0.03844075075586678 +wiki_lingua_en,tldr_en,rouge2_fmeasure,0.01499749394448183 diff --git a/146m14b14b/evaluation/generation/merged_2shots.json b/146m14b14b/evaluation/generation/merged_2shots.json new file mode 100644 index 0000000000000000000000000000000000000000..299abcc5a40906a5673818e858e6e7f8a5e26cae --- /dev/null +++ b/146m14b14b/evaluation/generation/merged_2shots.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"PALM_prompt": {"bleu": 0.2334282280458586, "bleu_stderr": 0.019052307328308617, "rouge1_fmeasure": 0.08900921610644012, "rouge1_fmeasure_stderr": 0.0020657069941074835, "rouge1_precision": 0.06664206812457203, "rouge1_precision_stderr": 0.0022521713907757224, "rouge1_recall": 0.22011552548283286, "rouge1_recall_stderr": 0.00410978487191024, "rouge2_fmeasure": 0.03844075075586678, "rouge2_fmeasure_stderr": 0.0011778262164477038, "rouge2_precision": 0.02742604267640331, "rouge2_precision_stderr": 0.0012236389813676137, "rouge2_recall": 0.10166270849980795, "rouge2_recall_stderr": 0.0026960826428279448, "rougeL_fmeasure": 0.0844769580786778, "rougeL_fmeasure_stderr": 0.0018659122044865008, "rougeL_precision": 0.06259979613562147, "rougeL_precision_stderr": 0.0020549501026142056, "rougeL_recall": 0.21306622153871427, "rougeL_recall_stderr": 0.003978348602912166, "rougeLsum_fmeasure": 0.0845681547310993, "rougeLsum_fmeasure_stderr": 0.001889837918272996, "rougeLsum_precision": 0.06293334235936807, "rougeLsum_precision_stderr": 0.002091916854143351, "rougeLsum_recall": 0.21239997123355958, "rougeLsum_recall_stderr": 0.003944956006147718}}, "GEM/wiki_lingua_en": {"tldr_en": {"bleu": 1.1784483013564102, "bleu_stderr": 0.06904095646323807, "rouge1_fmeasure": 0.12041833327196463, "rouge1_fmeasure_stderr": 0.001596604081334374, "rouge1_precision": 0.13646669155030028, "rouge1_precision_stderr": 0.00220310186554878, "rouge1_recall": 0.14546845977536896, "rouge1_recall_stderr": 0.002185736272211402, "rouge2_fmeasure": 0.01499749394448183, "rouge2_fmeasure_stderr": 0.000625100665952798, "rouge2_precision": 0.01724386477933621, "rouge2_precision_stderr": 0.0008686647411341404, "rouge2_recall": 0.01912476010235633, "rouge2_recall_stderr": 0.000862139084747869, "rougeL_fmeasure": 0.09741475108454614, "rougeL_fmeasure_stderr": 0.001214366654297324, "rougeL_precision": 0.11051635705738314, "rougeL_precision_stderr": 0.001756238864615761, "rougeL_recall": 0.11966135440554557, "rougeL_recall_stderr": 0.0017926753508674628, "rougeLsum_fmeasure": 0.11380484706597256, "rougeLsum_fmeasure_stderr": 0.001488577909873337, "rougeLsum_precision": 0.12908957637877216, "rougeLsum_precision_stderr": 0.0020679024538660115, "rougeLsum_recall": 0.13765696723373635, "rougeLsum_recall_stderr": 0.0020581929100874927}}, "e2e_nlg_cleaned": {"generate_text_restaurant": {"bleu": 7.633962606953475, "bleu_stderr": 0.11159543603672754, "rouge1_fmeasure": 0.356668169483382, "rouge1_fmeasure_stderr": 0.0021607608835383096, "rouge1_precision": 0.4202208020040644, "rouge1_precision_stderr": 0.003044005246951365, "rouge1_recall": 0.3432310437592059, "rouge1_recall_stderr": 0.0025043229759053897, "rouge2_fmeasure": 0.14338933259648326, "rouge2_fmeasure_stderr": 0.0016391755775913735, "rouge2_precision": 0.17155302692760083, "rouge2_precision_stderr": 0.0021420775665914075, "rouge2_recall": 0.1381486339251807, "rouge2_recall_stderr": 0.0017426260823225216, "rougeL_fmeasure": 0.2659375155003453, "rougeL_fmeasure_stderr": 0.0017894307181340716, "rougeL_precision": 0.3143755923201533, "rougeL_precision_stderr": 0.002563069353077956, "rougeL_recall": 0.25646642804127656, "rougeL_recall_stderr": 0.0020573152371357105, "rougeLsum_fmeasure": 0.29672587998555633, "rougeLsum_fmeasure_stderr": 0.002046175837266347, "rougeLsum_precision": 0.349974374493684, "rougeLsum_precision_stderr": 0.0028149623026241734, "rougeLsum_recall": 0.2856269773413544, "rougeLsum_recall_stderr": 0.002309169188218799}}, "gem_xsum": {"article_DOC_summary": {"bleu": 0.8946101579944589, "bleu_stderr": 0.11447862558731768, "rouge1_fmeasure": 0.1474750330894204, "rouge1_fmeasure_stderr": 0.002352105574798546, "rouge1_precision": 0.12823223548348878, "rouge1_precision_stderr": 0.0025565036587658725, "rouge1_recall": 0.2143351214598643, "rouge1_recall_stderr": 0.0036128802474860813, "rouge2_fmeasure": 0.02113796673167594, "rouge2_fmeasure_stderr": 0.0011185423587926665, "rouge2_precision": 0.017523161513321936, "rouge2_precision_stderr": 0.0009922032751105664, "rouge2_recall": 0.03274318045581887, "rouge2_recall_stderr": 0.0017433402857737756, "rougeL_fmeasure": 0.12005256994367001, "rougeL_fmeasure_stderr": 0.0017848482742927902, "rougeL_precision": 0.10370089292267248, "rougeL_precision_stderr": 0.001970850041369119, "rougeL_recall": 0.17671366292027554, "rougeL_recall_stderr": 0.0028713475720421144, "rougeLsum_fmeasure": 0.11549675473891775, "rougeLsum_fmeasure_stderr": 0.0018543831002685392, "rougeLsum_precision": 0.10049270031173661, "rougeLsum_precision_stderr": 0.002030546802009858, "rougeLsum_recall": 0.16875347685809536, "rougeLsum_recall_stderr": 0.002920858053558713}}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/merged_3shots.csv b/146m14b14b/evaluation/generation/merged_3shots.csv new file mode 100644 index 0000000000000000000000000000000000000000..44dec9c91551aec759920d69bb12912f4ee7fb96 --- /dev/null +++ b/146m14b14b/evaluation/generation/merged_3shots.csv @@ -0,0 +1,5 @@ +dataset,prompt,metric,value +e2e_nlg_cleaned,generate_text_restaurant,rouge2_fmeasure,0.15547063329696245 +gem_xsum,article_DOC_summary,rouge2_fmeasure,0.021594887074556493 +web_nlg_en,PALM_prompt,rouge2_fmeasure,0.03728553813713636 +wiki_lingua_en,tldr_en,rouge2_fmeasure,0.014741970258181074 diff --git a/146m14b14b/evaluation/generation/merged_3shots.json b/146m14b14b/evaluation/generation/merged_3shots.json new file mode 100644 index 0000000000000000000000000000000000000000..f1b7afe7792a3b2f2c8d85a58f53568c9604733e --- /dev/null +++ b/146m14b14b/evaluation/generation/merged_3shots.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"PALM_prompt": {"bleu": 0.23704825822381498, "bleu_stderr": 0.027670400496745257, "rouge1_fmeasure": 0.08585565394877333, "rouge1_fmeasure_stderr": 0.00199569997327382, "rouge1_precision": 0.06174874381710095, "rouge1_precision_stderr": 0.0018845302725776602, "rouge1_recall": 0.21730574055132956, "rouge1_recall_stderr": 0.00401363972509449, "rouge2_fmeasure": 0.03728553813713636, "rouge2_fmeasure_stderr": 0.00115452384616963, "rouge2_precision": 0.025823917902136784, "rouge2_precision_stderr": 0.0009656291224127635, "rouge2_recall": 0.09983273352200027, "rouge2_recall_stderr": 0.002662212695695811, "rougeL_fmeasure": 0.0816882805637377, "rougeL_fmeasure_stderr": 0.0018106503439831681, "rougeL_precision": 0.058172511770298356, "rougeL_precision_stderr": 0.0016848170587776589, "rougeL_recall": 0.21029648393397446, "rougeL_recall_stderr": 0.003872029478285546, "rougeLsum_fmeasure": 0.08201910289112235, "rougeLsum_fmeasure_stderr": 0.0018386813277759032, "rougeLsum_precision": 0.058665437136628244, "rougeLsum_precision_stderr": 0.0017389144610245677, "rougeLsum_recall": 0.21049148580432653, "rougeLsum_recall_stderr": 0.003861866479426883}}, "GEM/wiki_lingua_en": {"tldr_en": {"bleu": 1.0733305687102461, "bleu_stderr": 0.07881693779240048, "rouge1_fmeasure": 0.10566275122482133, "rouge1_fmeasure_stderr": 0.001743438719102012, "rouge1_precision": 0.13009014582477282, "rouge1_precision_stderr": 0.0027117628648043474, "rouge1_recall": 0.12804528790016462, "rouge1_recall_stderr": 0.002344428985321143, "rouge2_fmeasure": 0.014741970258181074, "rouge2_fmeasure_stderr": 0.0006322524751793364, "rouge2_precision": 0.01992958977730361, "rouge2_precision_stderr": 0.001100438026092398, "rouge2_recall": 0.018863080954515433, "rouge2_recall_stderr": 0.0009086120229391634, "rougeL_fmeasure": 0.08739180884548453, "rougeL_fmeasure_stderr": 0.0013975216677133634, "rougeL_precision": 0.10732457844198273, "rougeL_precision_stderr": 0.0022127410001868135, "rougeL_recall": 0.10756202262646183, "rougeL_recall_stderr": 0.001971425139131646, "rougeLsum_fmeasure": 0.09896289382222027, "rougeLsum_fmeasure_stderr": 0.0016138864108605157, "rougeLsum_precision": 0.12199094138547152, "rougeLsum_precision_stderr": 0.0025385358079340996, "rougeLsum_recall": 0.12012956265605543, "rougeLsum_recall_stderr": 0.002193175745267735}}, "e2e_nlg_cleaned": {"generate_text_restaurant": {"bleu": 8.542989864092585, "bleu_stderr": 0.1433846377318832, "rouge1_fmeasure": 0.363850648547613, "rouge1_fmeasure_stderr": 0.0021839425460877635, "rouge1_precision": 0.4336285483775792, "rouge1_precision_stderr": 0.0030892429977636836, "rouge1_recall": 0.34827761893603293, "rouge1_recall_stderr": 0.0025314100460238995, "rouge2_fmeasure": 0.15547063329696245, "rouge2_fmeasure_stderr": 0.001703404947537638, "rouge2_precision": 0.18892045173233854, "rouge2_precision_stderr": 0.00228916384509369, "rouge2_recall": 0.14857093701220483, "rouge2_recall_stderr": 0.0017945593778511408, "rougeL_fmeasure": 0.27970257938801435, "rougeL_fmeasure_stderr": 0.0018380493665157282, "rougeL_precision": 0.33571339839716463, "rougeL_precision_stderr": 0.002715659831835022, "rougeL_recall": 0.26744895745664315, "rougeL_recall_stderr": 0.0020800046877521064, "rougeLsum_fmeasure": 0.308271697646505, "rougeLsum_fmeasure_stderr": 0.0020717492761956327, "rougeLsum_precision": 0.36855797100423715, "rougeLsum_precision_stderr": 0.0029209189793231666, "rougeLsum_recall": 0.2947888159986677, "rougeLsum_recall_stderr": 0.0023280241517856827}}, "gem_xsum": {"article_DOC_summary": {"bleu": 0.9245086188793217, "bleu_stderr": 0.14099807544420506, "rouge1_fmeasure": 0.14593815028467524, "rouge1_fmeasure_stderr": 0.0026868363513368075, "rouge1_precision": 0.13560916718487, "rouge1_precision_stderr": 0.0030925128958130092, "rouge1_recall": 0.1989284367405898, "rouge1_recall_stderr": 0.0037001652881557775, "rouge2_fmeasure": 0.021594887074556493, "rouge2_fmeasure_stderr": 0.0011983258613855398, "rouge2_precision": 0.019698420478264804, "rouge2_precision_stderr": 0.0012604026842234879, "rouge2_recall": 0.030787306962827925, "rouge2_recall_stderr": 0.001718481660739863, "rougeL_fmeasure": 0.11855231312063658, "rougeL_fmeasure_stderr": 0.002059391483708031, "rougeL_precision": 0.10928242080406224, "rougeL_precision_stderr": 0.002413360587361133, "rougeL_recall": 0.16472649296075753, "rougeL_recall_stderr": 0.00306995614023579, "rougeLsum_fmeasure": 0.11374940769267439, "rougeLsum_fmeasure_stderr": 0.002097072555940818, "rougeLsum_precision": 0.10597385450714689, "rougeLsum_precision_stderr": 0.002479253447768338, "rougeLsum_recall": 0.15617920045628825, "rougeLsum_recall_stderr": 0.0030070719552520047}}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/merged_4shots.csv b/146m14b14b/evaluation/generation/merged_4shots.csv new file mode 100644 index 0000000000000000000000000000000000000000..9d549861c5e4acc1b8f90f1e6194df6a923cb1c4 --- /dev/null +++ b/146m14b14b/evaluation/generation/merged_4shots.csv @@ -0,0 +1,5 @@ +dataset,prompt,metric,value +e2e_nlg_cleaned,generate_text_restaurant,rouge2_fmeasure,0.15830475389068993 +gem_xsum,article_DOC_summary,rouge2_fmeasure,0.005653435053950428 +web_nlg_en,PALM_prompt,rouge2_fmeasure,0.03760470124518723 +wiki_lingua_en,tldr_en,rouge2_fmeasure,0.006267683793955542 diff --git a/146m14b14b/evaluation/generation/merged_4shots.json b/146m14b14b/evaluation/generation/merged_4shots.json new file mode 100644 index 0000000000000000000000000000000000000000..cf2f5524ec2cddd5d69c918682fb95481c2a2750 --- /dev/null +++ b/146m14b14b/evaluation/generation/merged_4shots.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"PALM_prompt": {"bleu": 0.22386799285100242, "bleu_stderr": 0.025018167341179967, "rouge1_fmeasure": 0.08717467473249238, "rouge1_fmeasure_stderr": 0.001988496293821803, "rouge1_precision": 0.06270503422215706, "rouge1_precision_stderr": 0.0018889830987665244, "rouge1_recall": 0.22196309199959086, "rouge1_recall_stderr": 0.003977028632566687, "rouge2_fmeasure": 0.03760470124518723, "rouge2_fmeasure_stderr": 0.0011212849650132507, "rouge2_precision": 0.02636193213072692, "rouge2_precision_stderr": 0.0009948761975072144, "rouge2_recall": 0.10182548952971911, "rouge2_recall_stderr": 0.0026075444055286934, "rougeL_fmeasure": 0.08288446430861876, "rougeL_fmeasure_stderr": 0.0017917823158372869, "rougeL_precision": 0.058872164432613124, "rougeL_precision_stderr": 0.0016543717782211067, "rougeL_recall": 0.2150079269966024, "rougeL_recall_stderr": 0.0038312866185628972, "rougeLsum_fmeasure": 0.08313479823551378, "rougeLsum_fmeasure_stderr": 0.0018229722576069702, "rougeLsum_precision": 0.059266742576785554, "rougeLsum_precision_stderr": 0.001700146105139763, "rougeLsum_recall": 0.2146878788305741, "rougeLsum_recall_stderr": 0.003813273422443642}}, "GEM/wiki_lingua_en": {"tldr_en": {"bleu": 0.09489512058813623, "bleu_stderr": 0.015019543669611902, "rouge1_fmeasure": 0.03692896653093653, "rouge1_fmeasure_stderr": 0.001444978729387553, "rouge1_precision": 0.04847824168899397, "rouge1_precision_stderr": 0.0021527306994951743, "rouge1_recall": 0.04290155774449955, "rouge1_recall_stderr": 0.0017685252209751602, "rouge2_fmeasure": 0.006267683793955542, "rouge2_fmeasure_stderr": 0.0005005058667584872, "rouge2_precision": 0.00916634461711251, "rouge2_precision_stderr": 0.0008793312023838661, "rouge2_recall": 0.007207669889606505, "rouge2_recall_stderr": 0.0006015119954053042, "rougeL_fmeasure": 0.03090192793357971, "rougeL_fmeasure_stderr": 0.001194549842708322, "rougeL_precision": 0.04072462596685456, "rougeL_precision_stderr": 0.001826606480591984, "rougeL_recall": 0.03615887229029135, "rougeL_recall_stderr": 0.0014729717928229518, "rougeLsum_fmeasure": 0.03442247041414027, "rougeLsum_fmeasure_stderr": 0.0013400879274452296, "rougeLsum_precision": 0.04542034910903916, "rougeLsum_precision_stderr": 0.0020204702947843995, "rougeLsum_recall": 0.039970284683949656, "rougeLsum_recall_stderr": 0.0016443164447444867}}, "e2e_nlg_cleaned": {"generate_text_restaurant": {"bleu": 8.942155482913693, "bleu_stderr": 0.16097618995677943, "rouge1_fmeasure": 0.36393958053094144, "rouge1_fmeasure_stderr": 0.0022056875684752482, "rouge1_precision": 0.43622486474795197, "rouge1_precision_stderr": 0.003180231455917317, "rouge1_recall": 0.34794080588594495, "rouge1_recall_stderr": 0.0025302437862891095, "rouge2_fmeasure": 0.15830475389068993, "rouge2_fmeasure_stderr": 0.0017490360106332174, "rouge2_precision": 0.19328105202965348, "rouge2_precision_stderr": 0.0023520595209739854, "rouge2_recall": 0.1512965958463667, "rouge2_recall_stderr": 0.0018449526665716308, "rougeL_fmeasure": 0.28058628929067725, "rougeL_fmeasure_stderr": 0.001866861403319837, "rougeL_precision": 0.3379579590729962, "rougeL_precision_stderr": 0.002768090228521333, "rougeL_recall": 0.26863807145078394, "rougeL_recall_stderr": 0.00211887791687035, "rougeLsum_fmeasure": 0.30998668589677814, "rougeLsum_fmeasure_stderr": 0.002129216424796756, "rougeLsum_precision": 0.3722310719989836, "rougeLsum_precision_stderr": 0.003015809692521337, "rougeLsum_recall": 0.29631987882427524, "rougeLsum_recall_stderr": 0.0023825505361464985}}, "gem_xsum": {"article_DOC_summary": {"bleu": 0.1945677197809648, "bleu_stderr": 0.04427282782106186, "rouge1_fmeasure": 0.03937861378098916, "rouge1_fmeasure_stderr": 0.002315856820827257, "rouge1_precision": 0.037913585028776065, "rouge1_precision_stderr": 0.0023001502602127523, "rouge1_recall": 0.04985852283454599, "rouge1_recall_stderr": 0.003059874154622636, "rouge2_fmeasure": 0.005653435053950428, "rouge2_fmeasure_stderr": 0.0006134888531248025, "rouge2_precision": 0.0049564518309591745, "rouge2_precision_stderr": 0.0005552167801086178, "rouge2_recall": 0.00808101754829496, "rouge2_recall_stderr": 0.0009432130706742604, "rougeL_fmeasure": 0.031970816594020374, "rougeL_fmeasure_stderr": 0.0018597075082346828, "rougeL_precision": 0.03066492611348946, "rougeL_precision_stderr": 0.0018517687252350952, "rougeL_recall": 0.0409686857380869, "rougeL_recall_stderr": 0.0025238815188420474, "rougeLsum_fmeasure": 0.03145276305797317, "rougeLsum_fmeasure_stderr": 0.0018465561902809033, "rougeLsum_precision": 0.030346836341775464, "rougeLsum_precision_stderr": 0.0018562425966080272, "rougeLsum_recall": 0.04007296257687896, "rougeLsum_recall_stderr": 0.00249432492023191}}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/merged_5shots.csv b/146m14b14b/evaluation/generation/merged_5shots.csv new file mode 100644 index 0000000000000000000000000000000000000000..98bec8fe9102ce374fafdfaf72f6e1e9aba75478 --- /dev/null +++ b/146m14b14b/evaluation/generation/merged_5shots.csv @@ -0,0 +1,5 @@ +dataset,prompt,metric,value +e2e_nlg_cleaned,generate_text_restaurant,rouge2_fmeasure,0.15987755776745063 +gem_xsum,article_DOC_summary,rouge2_fmeasure,0.0005247055544583527 +web_nlg_en,PALM_prompt,rouge2_fmeasure,0.03929727146315665 +wiki_lingua_en,tldr_en,rouge2_fmeasure,0.0012136209926164965 diff --git a/146m14b14b/evaluation/generation/merged_5shots.json b/146m14b14b/evaluation/generation/merged_5shots.json new file mode 100644 index 0000000000000000000000000000000000000000..d55ab3e9da0c51636d780abe64896576b876ec34 --- /dev/null +++ b/146m14b14b/evaluation/generation/merged_5shots.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"PALM_prompt": {"bleu": 0.2172353235676392, "bleu_stderr": 0.022164989159983352, "rouge1_fmeasure": 0.09099601606039612, "rouge1_fmeasure_stderr": 0.0022125069667482317, "rouge1_precision": 0.065980949123901, "rouge1_precision_stderr": 0.002147721445587754, "rouge1_recall": 0.226392298167293, "rouge1_recall_stderr": 0.0041042887852570055, "rouge2_fmeasure": 0.03929727146315665, "rouge2_fmeasure_stderr": 0.0012472270528183318, "rouge2_precision": 0.027910124577921847, "rouge2_precision_stderr": 0.001217123854556207, "rouge2_recall": 0.10372904873293698, "rouge2_recall_stderr": 0.0027182548458276256, "rougeL_fmeasure": 0.08582932917108592, "rougeL_fmeasure_stderr": 0.0019694454130826677, "rougeL_precision": 0.06123190050383225, "rougeL_precision_stderr": 0.0018493227441710388, "rougeL_recall": 0.21837596562168848, "rougeL_recall_stderr": 0.00393427728058764, "rougeLsum_fmeasure": 0.08587389536022703, "rougeLsum_fmeasure_stderr": 0.0019923488945407276, "rougeLsum_precision": 0.061481184925116956, "rougeLsum_precision_stderr": 0.0018863336098335244, "rougeLsum_recall": 0.21760550986389007, "rougeLsum_recall_stderr": 0.003896708014396261}}, "GEM/wiki_lingua_en": {"tldr_en": {"bleu": 1.7570330415242193e-11, "bleu_stderr": 9.79306473294127e-11, "rouge1_fmeasure": 0.006073432028332952, "rouge1_fmeasure_stderr": 0.0006628007918517945, "rouge1_precision": 0.009001052783311397, "rouge1_precision_stderr": 0.0010831466573619046, "rouge1_recall": 0.006686272162881273, "rouge1_recall_stderr": 0.0007630575014863376, "rouge2_fmeasure": 0.0012136209926164965, "rouge2_fmeasure_stderr": 0.0002535102063017258, "rouge2_precision": 0.0023261262222902496, "rouge2_precision_stderr": 0.0005664528532742944, "rouge2_recall": 0.0013364635095880378, "rouge2_recall_stderr": 0.00031600635668507955, "rougeL_fmeasure": 0.005151805662253334, "rougeL_fmeasure_stderr": 0.000567191022951449, "rougeL_precision": 0.007740597546289337, "rougeL_precision_stderr": 0.0009578721435880855, "rougeL_recall": 0.005711288605340614, "rougeL_recall_stderr": 0.0006470161855133605, "rougeLsum_fmeasure": 0.005764216603488259, "rougeLsum_fmeasure_stderr": 0.0006312612920070436, "rougeLsum_precision": 0.008661280593754251, "rougeLsum_precision_stderr": 0.0010585074288308866, "rougeLsum_recall": 0.00629402279957811, "rougeLsum_recall_stderr": 0.000708473118264577}}, "e2e_nlg_cleaned": {"generate_text_restaurant": {"bleu": 9.075675826005247, "bleu_stderr": 0.1653441614852989, "rouge1_fmeasure": 0.3655627727747465, "rouge1_fmeasure_stderr": 0.002179592031705995, "rouge1_precision": 0.43717237782424556, "rouge1_precision_stderr": 0.0032131153990395105, "rouge1_recall": 0.34970875370492777, "rouge1_recall_stderr": 0.002492021825922423, "rouge2_fmeasure": 0.15987755776745063, "rouge2_fmeasure_stderr": 0.001746307435561507, "rouge2_precision": 0.19475535986053671, "rouge2_precision_stderr": 0.002374332976786843, "rouge2_recall": 0.15278033886088896, "rouge2_recall_stderr": 0.0018229656355282099, "rougeL_fmeasure": 0.28581073763147397, "rougeL_fmeasure_stderr": 0.0018731672478296468, "rougeL_precision": 0.3431844847260117, "rougeL_precision_stderr": 0.002816860200252875, "rougeL_recall": 0.2738841977693453, "rougeL_recall_stderr": 0.0021185239782291246, "rougeLsum_fmeasure": 0.31334964121857656, "rougeLsum_fmeasure_stderr": 0.002093593744190019, "rougeLsum_precision": 0.3753378586075155, "rougeLsum_precision_stderr": 0.0030325254453903456, "rougeLsum_recall": 0.2998624179944231, "rougeLsum_recall_stderr": 0.0023395619025335535}}, "gem_xsum": {"article_DOC_summary": {"bleu": 4.562392577706544e-45, "bleu_stderr": 7.886084839478574e-39, "rouge1_fmeasure": 0.0029312590283442222, "rouge1_fmeasure_stderr": 0.0008258312479225911, "rouge1_precision": 0.003591191401452728, "rouge1_precision_stderr": 0.001020843909570341, "rouge1_recall": 0.0026658820558568995, "rouge1_recall_stderr": 0.0007715098804589186, "rouge2_fmeasure": 0.0005247055544583527, "rouge2_fmeasure_stderr": 0.00026825774796811777, "rouge2_precision": 0.0006103958934147612, "rouge2_precision_stderr": 0.0003006367876943161, "rouge2_recall": 0.0004676141468594298, "rouge2_recall_stderr": 0.0002456567053766645, "rougeL_fmeasure": 0.0023789038881630656, "rougeL_fmeasure_stderr": 0.0006611391677521927, "rougeL_precision": 0.00292309163386754, "rougeL_precision_stderr": 0.0008213440292977818, "rougeL_recall": 0.0021724670251771485, "rougeL_recall_stderr": 0.0006306676973825689, "rougeLsum_fmeasure": 0.0024716209620122146, "rougeLsum_fmeasure_stderr": 0.0006863922396788282, "rougeLsum_precision": 0.0030374426916148236, "rougeLsum_precision_stderr": 0.0008522737295493769, "rougeLsum_recall": 0.0022504336554593878, "rougeLsum_recall_stderr": 0.0006494473025743668}}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:21.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:21.json new file mode 100644 index 0000000000000000000000000000000000000000..4f3f817ca024821f75362d0445bd85021ceb82d2 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:21.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.2273593088129955, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.023048422160974854 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.06534546646292078, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0022653967634482346 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.21043781234205539, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003996208208082882 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08670665367464021, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0020039073276409366 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.026820845156803847, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0014207849027619004 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09549460860341967, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0026162181403760645 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.036699311161718554, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0011761982937247013 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.062094192289416324, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0021325935928020773 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.20451582032836602, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0038946312490941164 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08296705397653721, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0018647461030262618 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.062452077921763396, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0021917167381854613 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.20334748275991352, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0038281910048227016 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08287948300662376, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001877936799895705 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..e1344cefeb857a10cdb894c5f781774a7ef2668d --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.2334282280458586, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.019052307328308617 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.06664206812457203, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0022521713907757224 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.22011552548283286, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.00410978487191024 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08900921610644012, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0020657069941074835 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.02742604267640331, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0012236389813676137 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10166270849980795, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0026960826428279448 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03844075075586678, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0011778262164477038 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.06259979613562147, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0020549501026142056 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21306622153871427, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003978348602912166 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.0844769580786778, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0018659122044865008 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.06293334235936807, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.002091916854143351 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.21239997123355958, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003944956006147718 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.0845681547310993, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001889837918272996 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..51549418e3225ec32cea1d056ccb0db3b349f5ee --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.23704825822381498, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.027670400496745257 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.06174874381710095, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0018845302725776602 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.21730574055132956, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.00401363972509449 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08585565394877333, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.00199569997327382 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.025823917902136784, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0009656291224127635 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09983273352200027, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002662212695695811 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03728553813713636, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00115452384616963 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.058172511770298356, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0016848170587776589 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21029648393397446, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003872029478285546 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.0816882805637377, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0018106503439831681 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.058665437136628244, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0017389144610245677 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.21049148580432653, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003861866479426883 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08201910289112235, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0018386813277759032 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.json new file mode 100644 index 0000000000000000000000000000000000000000..511c61b322bd593c4e8d98c0080e9ac6253f7c73 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.22386799285100242, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.025018167341179967 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.06270503422215706, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0018889830987665244 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.22196309199959086, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003977028632566687 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08717467473249238, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001988496293821803 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.02636193213072692, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0009948761975072144 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10182548952971911, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0026075444055286934 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03760470124518723, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0011212849650132507 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.058872164432613124, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0016543717782211067 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.2150079269966024, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0038312866185628972 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08288446430861876, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0017917823158372869 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.059266742576785554, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.001700146105139763 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.2146878788305741, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003813273422443642 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08313479823551378, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0018229722576069702 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.json new file mode 100644 index 0000000000000000000000000000000000000000..c942a5839fab41580afaf0df837a64028a6a84ce --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-web_nlg_en.templates=PALM_prompt.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.2172353235676392, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.022164989159983352 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.065980949123901, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.002147721445587754 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.226392298167293, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0041042887852570055 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.09099601606039612, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0022125069667482317 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.027910124577921847, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.001217123854556207 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10372904873293698, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0027182548458276256 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03929727146315665, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0012472270528183318 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.06123190050383225, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0018493227441710388 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21837596562168848, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.00393427728058764 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08582932917108592, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0019694454130826677 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.061481184925116956, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0018863336098335244 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.21760550986389007, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003896708014396261 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08587389536022703, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019923488945407276 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:15.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:15.json new file mode 100644 index 0000000000000000000000000000000000000000..18fa8adb652876ea3edfa7f3993eee9953e24b0b --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:15.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.11957570551320973, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0017122678910133295 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.1255131562764684, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0018776761004921133 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.1065060745573131, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001343010186487139 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.008514681910363194, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0005650521915479279 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.009948502518022673, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.000628599205824894 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.007682465512529915, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0004379081883445011 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.09608447326527465, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0013633991908894674 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.10091203512154058, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0014635688317212099 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08492259997972296, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.000992809060772017 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.11463279913059665, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0016245587467926714 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.12015283120807896, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0017669328759193385 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.1019746781540952, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0012588460161316543 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.5846163931718847, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.05687316221546765 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..f566b0cfc1acec6a81c35c9c3b5bfb9e56512fb9 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.13646669155030028, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.00220310186554878 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.14546845977536896, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002185736272211402 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.12041833327196463, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001596604081334374 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.01724386477933621, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0008686647411341404 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.01912476010235633, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.000862139084747869 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.01499749394448183, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000625100665952798 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.11051635705738314, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001756238864615761 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.11966135440554557, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0017926753508674628 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.09741475108454614, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001214366654297324 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.12908957637877216, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0020679024538660115 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.13765696723373635, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0020581929100874927 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.11380484706597256, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001488577909873337 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 1.1784483013564102, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.06904095646323807 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..a5b2795cdc5c91d6b50245e35bff4ac673112f53 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.13009014582477282, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0027117628648043474 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.12804528790016462, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002344428985321143 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.10566275122482133, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001743438719102012 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.01992958977730361, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.001100438026092398 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.018863080954515433, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0009086120229391634 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.014741970258181074, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0006322524751793364 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.10732457844198273, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0022127410001868135 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.10756202262646183, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001971425139131646 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08739180884548453, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013975216677133634 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.12199094138547152, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0025385358079340996 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.12012956265605543, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.002193175745267735 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.09896289382222027, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0016138864108605157 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 1.0733305687102461, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.07881693779240048 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.json new file mode 100644 index 0000000000000000000000000000000000000000..5c3381fd85626c13c12dfcade4eebbc01b27cb02 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:56.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.04847824168899397, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0021527306994951743 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.04290155774449955, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0017685252209751602 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.03692896653093653, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001444978729387553 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.00916634461711251, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0008793312023838661 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.007207669889606505, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0006015119954053042 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.006267683793955542, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005005058667584872 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.04072462596685456, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001826606480591984 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.03615887229029135, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0014729717928229518 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.03090192793357971, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001194549842708322 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.04542034910903916, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0020204702947843995 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.039970284683949656, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0016443164447444867 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.03442247041414027, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013400879274452296 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.09489512058813623, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.015019543669611902 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.json new file mode 100644 index 0000000000000000000000000000000000000000..7d0aaba159f4e052ab45d8b09849840d7f7f7390 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=GEM-wiki_lingua_en.templates=tldr_en.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:49:35.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.009001052783311397, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0010831466573619046 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.006686272162881273, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0007630575014863376 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.006073432028332952, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0006628007918517945 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.0023261262222902496, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0005664528532742944 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.0013364635095880378, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00031600635668507955 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.0012136209926164965, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0002535102063017258 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.007740597546289337, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0009578721435880855 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.005711288605340614, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0006470161855133605 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.005151805662253334, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.000567191022951449 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.008661280593754251, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010585074288308866 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.00629402279957811, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.000708473118264577 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.005764216603488259, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0006312612920070436 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 1.7570330415242193e-11, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 9.79306473294127e-11 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.json new file mode 100644 index 0000000000000000000000000000000000000000..1db0a332896cd870392e20d1317fbd2e423796d8 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 6.473555746711545, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.11991319165714288 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.40761719747303815, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0029713394877934557 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.32622915467474406, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025105159843268037 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.34215561381708015, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0021672296322155556 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.15409023153796192, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.002113014573746519 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.12084762554962568, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016765309750084614 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.12725114846534297, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.00164076771900096 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.30083315187334836, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002499830888463013 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.23875130433521696, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019821323260998534 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.25088591074914646, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0017699934884865026 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.33648017824876714, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0027477983497877084 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.26827573657482673, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0022410349466470796 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.28175550985457437, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0020169711983159056 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..5b5fe84583f9504864f1af0d9d625c59239203f5 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 7.633962606953475, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.11159543603672754 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.4202208020040644, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003044005246951365 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.3432310437592059, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025043229759053897 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.356668169483382, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0021607608835383096 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.17155302692760083, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0021420775665914075 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.1381486339251807, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0017426260823225216 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.14338933259648326, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016391755775913735 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.3143755923201533, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002563069353077956 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.25646642804127656, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0020573152371357105 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.2659375155003453, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0017894307181340716 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.349974374493684, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0028149623026241734 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.2856269773413544, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002309169188218799 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.29672587998555633, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002046175837266347 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..ffbfdfc2c1a869912d51b3baeac5269d2eff096f --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 8.542989864092585, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.1433846377318832 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.4336285483775792, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0030892429977636836 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.34827761893603293, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025314100460238995 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.363850648547613, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0021839425460877635 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.18892045173233854, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.00228916384509369 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.14857093701220483, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0017945593778511408 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.15547063329696245, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001703404947537638 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.33571339839716463, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002715659831835022 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.26744895745664315, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0020800046877521064 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.27970257938801435, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018380493665157282 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.36855797100423715, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0029209189793231666 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.2947888159986677, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0023280241517856827 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.308271697646505, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0020717492761956327 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json new file mode 100644 index 0000000000000000000000000000000000000000..a3cbdc8bbd49d6060d390d6cba656acf429c57b6 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 8.942155482913693, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.16097618995677943 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.43622486474795197, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003180231455917317 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.34794080588594495, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025302437862891095 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.36393958053094144, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0022056875684752482 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.19328105202965348, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0023520595209739854 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.1512965958463667, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0018449526665716308 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.15830475389068993, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0017490360106332174 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.3379579590729962, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002768090228521333 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.26863807145078394, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.00211887791687035 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.28058628929067725, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.001866861403319837 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3722310719989836, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.003015809692521337 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.29631987882427524, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0023825505361464985 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.30998668589677814, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002129216424796756 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json new file mode 100644 index 0000000000000000000000000000000000000000..d4eed3f105ed79a20c9ae347f9dd16bf4e7c644d --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=e2e_nlg_cleaned.templates=generate_text_restaurant.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 9.075675826005247, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.1653441614852989 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.43717237782424556, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0032131153990395105 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.34970875370492777, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002492021825922423 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.3655627727747465, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002179592031705995 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.19475535986053671, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.002374332976786843 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.15278033886088896, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0018229656355282099 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.15987755776745063, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001746307435561507 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.3431844847260117, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.002816860200252875 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.2738841977693453, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0021185239782291246 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.28581073763147397, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018731672478296468 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3753378586075155, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0030325254453903456 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.2998624179944231, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0023395619025335535 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.31334964121857656, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002093593744190019 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.json new file mode 100644 index 0000000000000000000000000000000000000000..dd2ad4029867774f35ebf465cd80586fb4961deb --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=1.batchsize=16.seed=1234.timestamp=2023-01-22T19:19:14.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.12212339174292709, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.00227587516471315 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.21119172510668496, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0038426678932757265 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.14414241409467604, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002375277032964203 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.015557546867072657, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0009185461625331802 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.03139695878394972, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0017910321872932162 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.019599702192861963, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0011047188456746039 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.09647648601466811, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0016587561042236644 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.1698329781218389, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002988542446170774 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.11454550827536823, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001745331955977972 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.09487372714642431, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0017364409448969846 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.16552982169201422, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0031000761747263053 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.1122073254536097, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0018494671901998726 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.9731159077387963, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.11074536852276136 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..03142e061982d2043c1f08ad2fee432012f174b1 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=2.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.12823223548348878, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0025565036587658725 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.2143351214598643, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0036128802474860813 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.1474750330894204, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002352105574798546 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.017523161513321936, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0009922032751105664 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.03274318045581887, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0017433402857737756 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.02113796673167594, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0011185423587926665 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.10370089292267248, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.001970850041369119 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.17671366292027554, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0028713475720421144 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.12005256994367001, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017848482742927902 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.10049270031173661, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.002030546802009858 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.16875347685809536, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002920858053558713 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.11549675473891775, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0018543831002685392 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.8946101579944589, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.11447862558731768 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json new file mode 100644 index 0000000000000000000000000000000000000000..78c3478c064538be300e3edd8440bee911363885 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=3.batchsize=16.seed=1234.timestamp=2023-01-24T16:59:52.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.13560916718487, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0030925128958130092 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.1989284367405898, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0037001652881557775 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.14593815028467524, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0026868363513368075 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.019698420478264804, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0012604026842234879 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.030787306962827925, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.001718481660739863 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.021594887074556493, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0011983258613855398 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.10928242080406224, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.002413360587361133 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.16472649296075753, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.00306995614023579 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.11855231312063658, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.002059391483708031 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.10597385450714689, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.002479253447768338 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.15617920045628825, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0030070719552520047 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.11374940769267439, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.002097072555940818 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.9245086188793217, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.14099807544420506 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json new file mode 100644 index 0000000000000000000000000000000000000000..e5fb90289cc8a9c9d0a4cc678c5be700fa0cd819 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=4.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.037913585028776065, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0023001502602127523 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.04985852283454599, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003059874154622636 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.03937861378098916, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002315856820827257 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.0049564518309591745, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0005552167801086178 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.00808101754829496, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0009432130706742604 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.005653435053950428, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0006134888531248025 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.03066492611348946, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0018517687252350952 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.0409686857380869, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0025238815188420474 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.031970816594020374, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0018597075082346828 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.030346836341775464, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0018562425966080272 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.04007296257687896, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.00249432492023191 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.03145276305797317, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0018465561902809033 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.1945677197809648, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.04427282782106186 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json new file mode 100644 index 0000000000000000000000000000000000000000..5347004a3ad5f802b539050c409d07542aebbe5d --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.limited=3000.model=lm1-146m-14b.task=gem_xsum.templates=article_DOC_summary.fewshot=5.batchsize=16.seed=1234.timestamp=2023-01-24T15:52:02.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.003591191401452728, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.001020843909570341 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.0026658820558568995, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0007715098804589186 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.0029312590283442222, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0008258312479225911 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.0006103958934147612, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0003006367876943161 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.0004676141468594298, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0002456567053766645 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.0005247055544583527, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.00026825774796811777 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.00292309163386754, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0008213440292977818 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.0021724670251771485, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0006306676973825689 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.0023789038881630656, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0006611391677521927 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.0030374426916148236, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0008522737295493769 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.0022504336554593878, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0006494473025743668 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.0024716209620122146, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0006863922396788282 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 4.562392577706544e-45, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 7.886084839478574e-39 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-53-29_2shots.csv b/146m14b14b/evaluation/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-53-29_2shots.csv new file mode 100644 index 0000000000000000000000000000000000000000..ecf619ce074e921eb7ae4e638f0c78e3e5c2eb83 --- /dev/null +++ b/146m14b14b/evaluation/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-53-29_2shots.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.32,0.014758652303574876,0 +anli_r2,acc,0.334,0.014922019523732963,0 +anli_r3,acc,0.32916666666666666,0.013570806258433633,0 +arc_challenge,acc,0.15870307167235495,0.010677974278076945,0 +arc_challenge,acc_norm,0.2158703071672355,0.012022975360030665,0 +arc_easy,acc,0.37415824915824913,0.009929516948977625,0 +arc_easy,acc_norm,0.3707912457912458,0.009911292822056923,0 +boolq,acc,0.5944954128440367,0.008587459055441612,1 +cb,acc,0.35714285714285715,0.0646095738380922,1 +cb,f1,0.24356089022255564,,1 +copa,acc,0.62,0.048783173121456316,0 +hellaswag,acc,0.26837283409679347,0.004422070927212532,0 +hellaswag,acc_norm,0.27982473610834496,0.004479955169853626,0 +piqa,acc,0.5843307943416758,0.011498699770894783,0 +piqa,acc_norm,0.5897714907508161,0.011476256036359114,0 +rte,acc,0.51985559566787,0.030072723167317184,0 +sciq,acc,0.736,0.013946271849440472,0 +sciq,acc_norm,0.691,0.014619600977206486,0 +storycloze_2016,acc,0.5462319615179049,0.01151289919986303,0 +winogrande,acc,0.5169692186266772,0.014044390401612981,0 diff --git a/146m14b14b/evaluation/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-53-29_2shots.json b/146m14b14b/evaluation/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-53-29_2shots.json new file mode 100644 index 0000000000000000000000000000000000000000..8b863355c805992a1bab7575203b54a8c67125ca --- /dev/null +++ b/146m14b14b/evaluation/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-53-29_2shots.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.32, + "acc_stderr": 0.014758652303574876 + }, + "anli_r2": { + "acc": 0.334, + "acc_stderr": 0.014922019523732963 + }, + "anli_r3": { + "acc": 0.32916666666666666, + "acc_stderr": 0.013570806258433633 + }, + "cb": { + "acc": 0.35714285714285715, + "acc_stderr": 0.0646095738380922, + "f1": 0.24356089022255564 + }, + "copa": { + "acc": 0.62, + "acc_stderr": 0.048783173121456316 + }, + "hellaswag": { + "acc": 0.26837283409679347, + "acc_stderr": 0.004422070927212532, + "acc_norm": 0.27982473610834496, + "acc_norm_stderr": 0.004479955169853626 + }, + "rte": { + "acc": 0.51985559566787, + "acc_stderr": 0.030072723167317184 + }, + "winogrande": { + "acc": 0.5169692186266772, + "acc_stderr": 0.014044390401612981 + }, + "storycloze_2016": { + "acc": 0.5462319615179049, + "acc_stderr": 0.01151289919986303 + }, + "boolq": { + "acc": 0.5944954128440367, + "acc_stderr": 0.008587459055441612 + }, + "arc_easy": { + "acc": 0.37415824915824913, + "acc_stderr": 0.009929516948977625, + "acc_norm": 0.3707912457912458, + "acc_norm_stderr": 0.009911292822056923 + }, + "arc_challenge": { + "acc": 0.15870307167235495, + "acc_stderr": 0.010677974278076945, + "acc_norm": 0.2158703071672355, + "acc_norm_stderr": 0.012022975360030665 + }, + "sciq": { + "acc": 0.736, + "acc_stderr": 0.013946271849440472, + "acc_norm": 0.691, + "acc_norm_stderr": 0.014619600977206486 + }, + "piqa": { + "acc": 0.5843307943416758, + "acc_stderr": 0.011498699770894783, + "acc_norm": 0.5897714907508161, + "acc_norm_stderr": 0.011476256036359114 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-22-18-55-49_0shots.csv b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-22-18-55-49_0shots.csv new file mode 100644 index 0000000000000000000000000000000000000000..cc381d185b32f844f4ee173a651311733d61c381 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-22-18-55-49_0shots.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.326,0.014830507204541037,0 +anli_r2,acc,0.356,0.01514904265930662,0 +anli_r3,acc,0.3475,0.013751753243291852,0 +arc_challenge,acc,0.17406143344709898,0.0110801771294822,0 +arc_challenge,acc_norm,0.2175767918088737,0.012057262020972504,0 +arc_easy,acc,0.39436026936026936,0.010028176038392992,0 +arc_easy,acc_norm,0.3573232323232323,0.009833205612463125,0 +boolq,acc,0.608868501529052,0.008535239054221164,1 +cb,acc,0.4107142857142857,0.0663363415035954,1 +cb,f1,0.2134646962233169,,1 +copa,acc,0.64,0.04824181513244218,0 +hellaswag,acc,0.26598287193786097,0.004409521343140108,0 +hellaswag,acc_norm,0.27633937462656843,0.00446272754305589,0 +piqa,acc,0.5914036996735582,0.011469240387245143,0 +piqa,acc_norm,0.5935799782372143,0.01145968265012358,0 +rte,acc,0.5270758122743683,0.030052303463143706,0 +sciq,acc,0.71,0.014356395999905689,0 +sciq,acc_norm,0.62,0.01535694747779758,0 +storycloze_2016,acc,0.5467664350614645,0.011511744771088352,0 +winogrande,acc,0.5114443567482242,0.014048804199859329,0 diff --git a/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-22-18-55-49_0shots.json b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-22-18-55-49_0shots.json new file mode 100644 index 0000000000000000000000000000000000000000..d1dbff93312b2b6d50c4ffcb037931e914bc02bf --- /dev/null +++ b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-22-18-55-49_0shots.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.326, + "acc_stderr": 0.014830507204541037 + }, + "anli_r2": { + "acc": 0.356, + "acc_stderr": 0.01514904265930662 + }, + "anli_r3": { + "acc": 0.3475, + "acc_stderr": 0.013751753243291852 + }, + "cb": { + "acc": 0.4107142857142857, + "acc_stderr": 0.0663363415035954, + "f1": 0.2134646962233169 + }, + "copa": { + "acc": 0.64, + "acc_stderr": 0.04824181513244218 + }, + "hellaswag": { + "acc": 0.26598287193786097, + "acc_stderr": 0.004409521343140108, + "acc_norm": 0.27633937462656843, + "acc_norm_stderr": 0.00446272754305589 + }, + "rte": { + "acc": 0.5270758122743683, + "acc_stderr": 0.030052303463143706 + }, + "winogrande": { + "acc": 0.5114443567482242, + "acc_stderr": 0.014048804199859329 + }, + "storycloze_2016": { + "acc": 0.5467664350614645, + "acc_stderr": 0.011511744771088352 + }, + "boolq": { + "acc": 0.608868501529052, + "acc_stderr": 0.008535239054221164 + }, + "arc_easy": { + "acc": 0.39436026936026936, + "acc_stderr": 0.010028176038392992, + "acc_norm": 0.3573232323232323, + "acc_norm_stderr": 0.009833205612463125 + }, + "arc_challenge": { + "acc": 0.17406143344709898, + "acc_stderr": 0.0110801771294822, + "acc_norm": 0.2175767918088737, + "acc_norm_stderr": 0.012057262020972504 + }, + "sciq": { + "acc": 0.71, + "acc_stderr": 0.014356395999905689, + "acc_norm": 0.62, + "acc_norm_stderr": 0.01535694747779758 + }, + "piqa": { + "acc": 0.5914036996735582, + "acc_stderr": 0.011469240387245143, + "acc_norm": 0.5935799782372143, + "acc_norm_stderr": 0.01145968265012358 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-23-19-58-52_1shots.csv b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-23-19-58-52_1shots.csv new file mode 100644 index 0000000000000000000000000000000000000000..eca8da7f2cc09607b0084cd284eb93c079926924 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-23-19-58-52_1shots.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.331,0.014888272588203936,0 +anli_r2,acc,0.327,0.01484221315341124,0 +anli_r3,acc,0.3466666666666667,0.013744022550571954,0 +arc_challenge,acc,0.17406143344709898,0.011080177129482201,0 +arc_challenge,acc_norm,0.2158703071672355,0.012022975360030668,0 +arc_easy,acc,0.3859427609427609,0.009989277329503951,0 +arc_easy,acc_norm,0.35395622895622897,0.009812370644174414,0 +boolq,acc,0.5737003058103975,0.008649531625805671,1 +cb,acc,0.3392857142857143,0.06384226561930825,1 +cb,f1,0.23757309941520469,,1 +copa,acc,0.63,0.04852365870939099,0 +hellaswag,acc,0.2673770165305716,0.004416861919101004,0 +hellaswag,acc_norm,0.2784305915156343,0.004473104537026919,0 +piqa,acc,0.5957562568008705,0.011449891763007466,0 +piqa,acc_norm,0.5946681175190425,0.01145481638734678,0 +rte,acc,0.5740072202166066,0.029764956741777645,0 +sciq,acc,0.721,0.014190150117612032,0 +sciq,acc_norm,0.675,0.014818724459095524,0 +storycloze_2016,acc,0.547300908605024,0.011510576955232206,0 +winogrande,acc,0.5067087608524072,0.014051220692330349,0 diff --git a/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-23-19-58-52_1shots.json b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-23-19-58-52_1shots.json new file mode 100644 index 0000000000000000000000000000000000000000..75f7abac8c13554ceddd04fc3e90563ba49954fb --- /dev/null +++ b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-23-19-58-52_1shots.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.331, + "acc_stderr": 0.014888272588203936 + }, + "anli_r2": { + "acc": 0.327, + "acc_stderr": 0.01484221315341124 + }, + "anli_r3": { + "acc": 0.3466666666666667, + "acc_stderr": 0.013744022550571954 + }, + "cb": { + "acc": 0.3392857142857143, + "acc_stderr": 0.06384226561930825, + "f1": 0.23757309941520469 + }, + "copa": { + "acc": 0.63, + "acc_stderr": 0.04852365870939099 + }, + "hellaswag": { + "acc": 0.2673770165305716, + "acc_stderr": 0.004416861919101004, + "acc_norm": 0.2784305915156343, + "acc_norm_stderr": 0.004473104537026919 + }, + "rte": { + "acc": 0.5740072202166066, + "acc_stderr": 0.029764956741777645 + }, + "winogrande": { + "acc": 0.5067087608524072, + "acc_stderr": 0.014051220692330349 + }, + "storycloze_2016": { + "acc": 0.547300908605024, + "acc_stderr": 0.011510576955232206 + }, + "boolq": { + "acc": 0.5737003058103975, + "acc_stderr": 0.008649531625805671 + }, + "arc_easy": { + "acc": 0.3859427609427609, + "acc_stderr": 0.009989277329503951, + "acc_norm": 0.35395622895622897, + "acc_norm_stderr": 0.009812370644174414 + }, + "arc_challenge": { + "acc": 0.17406143344709898, + "acc_stderr": 0.011080177129482201, + "acc_norm": 0.2158703071672355, + "acc_norm_stderr": 0.012022975360030668 + }, + "sciq": { + "acc": 0.721, + "acc_stderr": 0.014190150117612032, + "acc_norm": 0.675, + "acc_norm_stderr": 0.014818724459095524 + }, + "piqa": { + "acc": 0.5957562568008705, + "acc_stderr": 0.011449891763007466, + "acc_norm": 0.5946681175190425, + "acc_norm_stderr": 0.01145481638734678 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-53-29_2shots.csv b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-53-29_2shots.csv new file mode 100644 index 0000000000000000000000000000000000000000..ecf619ce074e921eb7ae4e638f0c78e3e5c2eb83 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-53-29_2shots.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.32,0.014758652303574876,0 +anli_r2,acc,0.334,0.014922019523732963,0 +anli_r3,acc,0.32916666666666666,0.013570806258433633,0 +arc_challenge,acc,0.15870307167235495,0.010677974278076945,0 +arc_challenge,acc_norm,0.2158703071672355,0.012022975360030665,0 +arc_easy,acc,0.37415824915824913,0.009929516948977625,0 +arc_easy,acc_norm,0.3707912457912458,0.009911292822056923,0 +boolq,acc,0.5944954128440367,0.008587459055441612,1 +cb,acc,0.35714285714285715,0.0646095738380922,1 +cb,f1,0.24356089022255564,,1 +copa,acc,0.62,0.048783173121456316,0 +hellaswag,acc,0.26837283409679347,0.004422070927212532,0 +hellaswag,acc_norm,0.27982473610834496,0.004479955169853626,0 +piqa,acc,0.5843307943416758,0.011498699770894783,0 +piqa,acc_norm,0.5897714907508161,0.011476256036359114,0 +rte,acc,0.51985559566787,0.030072723167317184,0 +sciq,acc,0.736,0.013946271849440472,0 +sciq,acc_norm,0.691,0.014619600977206486,0 +storycloze_2016,acc,0.5462319615179049,0.01151289919986303,0 +winogrande,acc,0.5169692186266772,0.014044390401612981,0 diff --git a/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-53-29_2shots.json b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-53-29_2shots.json new file mode 100644 index 0000000000000000000000000000000000000000..8b863355c805992a1bab7575203b54a8c67125ca --- /dev/null +++ b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-53-29_2shots.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.32, + "acc_stderr": 0.014758652303574876 + }, + "anli_r2": { + "acc": 0.334, + "acc_stderr": 0.014922019523732963 + }, + "anli_r3": { + "acc": 0.32916666666666666, + "acc_stderr": 0.013570806258433633 + }, + "cb": { + "acc": 0.35714285714285715, + "acc_stderr": 0.0646095738380922, + "f1": 0.24356089022255564 + }, + "copa": { + "acc": 0.62, + "acc_stderr": 0.048783173121456316 + }, + "hellaswag": { + "acc": 0.26837283409679347, + "acc_stderr": 0.004422070927212532, + "acc_norm": 0.27982473610834496, + "acc_norm_stderr": 0.004479955169853626 + }, + "rte": { + "acc": 0.51985559566787, + "acc_stderr": 0.030072723167317184 + }, + "winogrande": { + "acc": 0.5169692186266772, + "acc_stderr": 0.014044390401612981 + }, + "storycloze_2016": { + "acc": 0.5462319615179049, + "acc_stderr": 0.01151289919986303 + }, + "boolq": { + "acc": 0.5944954128440367, + "acc_stderr": 0.008587459055441612 + }, + "arc_easy": { + "acc": 0.37415824915824913, + "acc_stderr": 0.009929516948977625, + "acc_norm": 0.3707912457912458, + "acc_norm_stderr": 0.009911292822056923 + }, + "arc_challenge": { + "acc": 0.15870307167235495, + "acc_stderr": 0.010677974278076945, + "acc_norm": 0.2158703071672355, + "acc_norm_stderr": 0.012022975360030665 + }, + "sciq": { + "acc": 0.736, + "acc_stderr": 0.013946271849440472, + "acc_norm": 0.691, + "acc_norm_stderr": 0.014619600977206486 + }, + "piqa": { + "acc": 0.5843307943416758, + "acc_stderr": 0.011498699770894783, + "acc_norm": 0.5897714907508161, + "acc_norm_stderr": 0.011476256036359114 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-03_4shots.csv b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-03_4shots.csv new file mode 100644 index 0000000000000000000000000000000000000000..523028a32921caa4ce72311ce387b20246c94fae --- /dev/null +++ b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-03_4shots.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.353,0.015120172605483708,0 +anli_r2,acc,0.353,0.015120172605483692,0 +anli_r3,acc,0.345,0.013728421539454876,0 +arc_challenge,acc,0.16638225255972697,0.010883248065964142,0 +arc_challenge,acc_norm,0.2090443686006826,0.011882746987406457,0 +arc_easy,acc,0.38804713804713803,0.009999295905750659,0 +arc_easy,acc_norm,0.35648148148148145,0.009828046544504424,0 +boolq,acc,0.6091743119266055,0.00853404406545651,1 +cb,acc,0.4107142857142857,0.06633634150359541,1 +cb,f1,0.28296670272869334,,1 +copa,acc,0.61,0.04902071300001975,0 +hellaswag,acc,0.2698665604461263,0.0044298311529146804,0 +hellaswag,acc_norm,0.2778331009759012,0.004470152081675125,0 +piqa,acc,0.5870511425462459,0.011487658725079095,0 +piqa,acc_norm,0.5799782372143635,0.011515615810587486,0 +rte,acc,0.516245487364621,0.030080573208738064,0 +sciq,acc,0.748,0.013736254390651145,0 +sciq,acc_norm,0.708,0.014385511563477345,0 +storycloze_2016,acc,0.547300908605024,0.011510576955232206,0 +winogrande,acc,0.5114443567482242,0.014048804199859329,0 diff --git a/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-03_4shots.json b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-03_4shots.json new file mode 100644 index 0000000000000000000000000000000000000000..5f11ad260ec4575a2216ab34817151119a1a2ba3 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-03_4shots.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.353, + "acc_stderr": 0.015120172605483708 + }, + "anli_r2": { + "acc": 0.353, + "acc_stderr": 0.015120172605483692 + }, + "anli_r3": { + "acc": 0.345, + "acc_stderr": 0.013728421539454876 + }, + "cb": { + "acc": 0.4107142857142857, + "acc_stderr": 0.06633634150359541, + "f1": 0.28296670272869334 + }, + "copa": { + "acc": 0.61, + "acc_stderr": 0.04902071300001975 + }, + "hellaswag": { + "acc": 0.2698665604461263, + "acc_stderr": 0.0044298311529146804, + "acc_norm": 0.2778331009759012, + "acc_norm_stderr": 0.004470152081675125 + }, + "rte": { + "acc": 0.516245487364621, + "acc_stderr": 0.030080573208738064 + }, + "winogrande": { + "acc": 0.5114443567482242, + "acc_stderr": 0.014048804199859329 + }, + "storycloze_2016": { + "acc": 0.547300908605024, + "acc_stderr": 0.011510576955232206 + }, + "boolq": { + "acc": 0.6091743119266055, + "acc_stderr": 0.00853404406545651 + }, + "arc_easy": { + "acc": 0.38804713804713803, + "acc_stderr": 0.009999295905750659, + "acc_norm": 0.35648148148148145, + "acc_norm_stderr": 0.009828046544504424 + }, + "arc_challenge": { + "acc": 0.16638225255972697, + "acc_stderr": 0.010883248065964142, + "acc_norm": 0.2090443686006826, + "acc_norm_stderr": 0.011882746987406457 + }, + "sciq": { + "acc": 0.748, + "acc_stderr": 0.013736254390651145, + "acc_norm": 0.708, + "acc_norm_stderr": 0.014385511563477345 + }, + "piqa": { + "acc": 0.5870511425462459, + "acc_stderr": 0.011487658725079095, + "acc_norm": 0.5799782372143635, + "acc_norm_stderr": 0.011515615810587486 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-04_3shots.csv b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-04_3shots.csv new file mode 100644 index 0000000000000000000000000000000000000000..98f81d24f5a4b269d218bf9d00fc17c549ff8787 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-04_3shots.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.319,0.014746404865473472,0 +anli_r2,acc,0.348,0.01507060460376841,0 +anli_r3,acc,0.3416666666666667,0.013696658778002512,0 +arc_challenge,acc,0.17406143344709898,0.011080177129482203,0 +arc_challenge,acc_norm,0.2090443686006826,0.011882746987406451,0 +arc_easy,acc,0.382996632996633,0.00997492038453648,0 +arc_easy,acc_norm,0.36363636363636365,0.009870849346011776,0 +boolq,acc,0.6039755351681957,0.008553881336813417,1 +cb,acc,0.35714285714285715,0.0646095738380922,1 +cb,f1,0.23561507936507933,,1 +copa,acc,0.61,0.04902071300001975,0 +hellaswag,acc,0.2704640509858594,0.004432917403755055,0 +hellaswag,acc_norm,0.27853017327225654,0.004473595650807676,0 +piqa,acc,0.5957562568008705,0.011449891763007466,0 +piqa,acc_norm,0.588683351468988,0.011480860577192815,0 +rte,acc,0.5342960288808665,0.030025579819366426,0 +sciq,acc,0.718,0.014236526215291334,0 +sciq,acc_norm,0.694,0.014580006055436965,0 +storycloze_2016,acc,0.5483698556921432,0.011508201145928354,0 +winogrande,acc,0.5232833464877664,0.014037241309573642,0 diff --git a/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-04_3shots.json b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-04_3shots.json new file mode 100644 index 0000000000000000000000000000000000000000..fd636b015a2488167aca408348629bf41333202d --- /dev/null +++ b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-13-57-04_3shots.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.319, + "acc_stderr": 0.014746404865473472 + }, + "anli_r2": { + "acc": 0.348, + "acc_stderr": 0.01507060460376841 + }, + "anli_r3": { + "acc": 0.3416666666666667, + "acc_stderr": 0.013696658778002512 + }, + "cb": { + "acc": 0.35714285714285715, + "acc_stderr": 0.0646095738380922, + "f1": 0.23561507936507933 + }, + "copa": { + "acc": 0.61, + "acc_stderr": 0.04902071300001975 + }, + "hellaswag": { + "acc": 0.2704640509858594, + "acc_stderr": 0.004432917403755055, + "acc_norm": 0.27853017327225654, + "acc_norm_stderr": 0.004473595650807676 + }, + "rte": { + "acc": 0.5342960288808665, + "acc_stderr": 0.030025579819366426 + }, + "winogrande": { + "acc": 0.5232833464877664, + "acc_stderr": 0.014037241309573642 + }, + "storycloze_2016": { + "acc": 0.5483698556921432, + "acc_stderr": 0.011508201145928354 + }, + "boolq": { + "acc": 0.6039755351681957, + "acc_stderr": 0.008553881336813417 + }, + "arc_easy": { + "acc": 0.382996632996633, + "acc_stderr": 0.00997492038453648, + "acc_norm": 0.36363636363636365, + "acc_norm_stderr": 0.009870849346011776 + }, + "arc_challenge": { + "acc": 0.17406143344709898, + "acc_stderr": 0.011080177129482203, + "acc_norm": 0.2090443686006826, + "acc_norm_stderr": 0.011882746987406451 + }, + "sciq": { + "acc": 0.718, + "acc_stderr": 0.014236526215291334, + "acc_norm": 0.694, + "acc_norm_stderr": 0.014580006055436965 + }, + "piqa": { + "acc": 0.5957562568008705, + "acc_stderr": 0.011449891763007466, + "acc_norm": 0.588683351468988, + "acc_norm_stderr": 0.011480860577192815 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-17-05-33_5shots.csv b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-17-05-33_5shots.csv new file mode 100644 index 0000000000000000000000000000000000000000..888772ef1f2d1ca262a67cb8f6b2d0b7caf51e06 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-17-05-33_5shots.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.343,0.015019206922356951,0 +anli_r2,acc,0.338,0.014965960710224487,0 +anli_r3,acc,0.3333333333333333,0.013613950010225598,0 +arc_challenge,acc,0.1680887372013652,0.01092771504612486,0 +arc_challenge,acc_norm,0.21416382252559726,0.011988383205966494,0 +arc_easy,acc,0.3968855218855219,0.010039236800583202,0 +arc_easy,acc_norm,0.36784511784511786,0.009894923464455196,0 +boolq,acc,0.609480122324159,0.008532845556631466,1 +cb,acc,0.5178571428571429,0.06737697508644648,1 +cb,f1,0.3607787942763312,,1 +copa,acc,0.62,0.048783173121456316,0 +hellaswag,acc,0.26976697868950406,0.004429315788310528,0 +hellaswag,acc_norm,0.2778331009759012,0.004470152081675125,0 +piqa,acc,0.5908596300326442,0.011471593460443316,0 +piqa,acc_norm,0.588683351468988,0.011480860577192817,0 +rte,acc,0.5667870036101083,0.029826764082138264,0 +sciq,acc,0.737,0.013929286594259715,0 +sciq,acc_norm,0.715,0.014282120955200478,0 +storycloze_2016,acc,0.5547835382148584,0.01149281951929236,0 +winogrande,acc,0.5240726124704025,0.014036189665395125,0 diff --git a/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-17-05-33_5shots.json b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-17-05-33_5shots.json new file mode 100644 index 0000000000000000000000000000000000000000..2454dd332b96d91732b7c17aaef9bcc67a9cc328 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/lm1-146m-14b-results_lm-eval_global_step21553_2023-01-24-17-05-33_5shots.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.343, + "acc_stderr": 0.015019206922356951 + }, + "anli_r2": { + "acc": 0.338, + "acc_stderr": 0.014965960710224487 + }, + "anli_r3": { + "acc": 0.3333333333333333, + "acc_stderr": 0.013613950010225598 + }, + "cb": { + "acc": 0.5178571428571429, + "acc_stderr": 0.06737697508644648, + "f1": 0.3607787942763312 + }, + "copa": { + "acc": 0.62, + "acc_stderr": 0.048783173121456316 + }, + "hellaswag": { + "acc": 0.26976697868950406, + "acc_stderr": 0.004429315788310528, + "acc_norm": 0.2778331009759012, + "acc_norm_stderr": 0.004470152081675125 + }, + "rte": { + "acc": 0.5667870036101083, + "acc_stderr": 0.029826764082138264 + }, + "winogrande": { + "acc": 0.5240726124704025, + "acc_stderr": 0.014036189665395125 + }, + "storycloze_2016": { + "acc": 0.5547835382148584, + "acc_stderr": 0.01149281951929236 + }, + "boolq": { + "acc": 0.609480122324159, + "acc_stderr": 0.008532845556631466 + }, + "arc_easy": { + "acc": 0.3968855218855219, + "acc_stderr": 0.010039236800583202, + "acc_norm": 0.36784511784511786, + "acc_norm_stderr": 0.009894923464455196 + }, + "arc_challenge": { + "acc": 0.1680887372013652, + "acc_stderr": 0.01092771504612486, + "acc_norm": 0.21416382252559726, + "acc_norm_stderr": 0.011988383205966494 + }, + "sciq": { + "acc": 0.737, + "acc_stderr": 0.013929286594259715, + "acc_norm": 0.715, + "acc_norm_stderr": 0.014282120955200478 + }, + "piqa": { + "acc": 0.5908596300326442, + "acc_stderr": 0.011471593460443316, + "acc_norm": 0.588683351468988, + "acc_norm_stderr": 0.011480860577192817 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0539c73ac43c2524150b6d0a94be72a7d562b8ea --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9586b675a5842d3d99bb3579d40f28bf26893b6016d5f84b3b2c83148629074 +size 27478295 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbf56e893278ba939ff98738a45925ec0d126fa5 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d0a43c2869d6666c2bfaa0d62c6df79266d83b36d6793f181dfa7c937c46154 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2ca943a438ba950808fe8488235d2c578392d49 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0dc4be8959c9071dc0955e77b3eab1838b3b5f6bd44455d8b993ed558a6714 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b95928d37f8d8719946e089ad57b3634e81189f --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:822431276d89461ffa60388de911d415f99f80cf95dd69e681229733e841391c +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fec3e480575bab776257c671f951dde0b5e57cc3 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aaa90028239866b878c80f406bcbf1ec2bf875d0c2aea9ed2cc5caa2bbeadd5 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4816d8b28c89f86fccec88efc05f615bfa7d7058 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8c92cecd19e735bfea6f3437dd70d05c16830fc3b1d6afc412bae59dbfa2e6 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc65f8aedf5bbbac415cdd3291e71e39d952f1a5 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482d4cffecd5229c4fec5914fec2c685ffaf69f8241ec4d39fb17ed7a2ba57de +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5be2dd3e83fff16853a627d70cf8a88805048db --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09d0da3c21bb948d5d94f28989573b82140ad88909709122d4737fe898646875 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ea7ce6eec8e210ffee68320dd8b5587e624f1b4 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7abdd4e055d44b12e0a972079a8244fa37b50754db27b7eeb6ddeb8f8906716 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b678fa67f2d96aa8cf72388e070921cc3826c2b --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53c40fd87709a558c70e3231a5fbc1ee97745b962cb9af91af381a26c3c7495e +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6e470512991cf8a0bb7945d5302ff9077840e91 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:374b6e70d89c41cbe948271cb9269e0e4bab730f4d0941a972791a5577452099 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3d6e6702f80105cd71a89289df8bd217b8eba18 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c89ff817796fb36f7ce2041da51896089878d5aa478857af51e739b9de4e11fa +size 27478231 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4ce146a5aec03e3c0494d5f2fcc828def0d7e84 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f07ab2b5d39bbdfd61e592dfbe59b0bb95021c9a01effc5da8a0391a306a236 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..271cdcebd71454a9dffc98239a96bd62a67fc742 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47990d5ed8d82f86e1ff014ba57cea7431da318335900def4d687527131db686 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..11b0536f90e9570aad9216b3bf933b519aea5b56 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:545bdda96850952aec780777abfd2c640fa33814c35efec066b2b82d8349d4e0 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..836b89d184eb9bc6091cdab2e029b5fdf9cc2565 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1b19058ab00cfa204cac6e0e9b336d7cfdaafe8e8e7ef9797cf1ad699b7df9d +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c9ef9421580b721cc86a763c9ce79edc16e3110 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791f472c0002a3eb53e2955bcd6abbd8ce522ac8cd8b9387e28cc85b5fbaa776 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8bae0ba0358547d4a8063c2a911b13295f7c539 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29209894d5fec5a0b772bf7e74f328a81d03c95a3dd24b3ab7b3ea392e4229ef +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..12990c9031c6cbbbca947e9f5f8bb7967752f070 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f1312898e9d522967afc35b957db83e95d0d3e77ee0044c160735bf74889d77 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..23be9852e78faf1c1788ea6db5357b9ad3e3225b --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e58b5c1cc12ca8948eec3fda0412384a6d0573a6120299292ddfcf96c0340e8 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..752e49b446790a74c3bebb9fc2f2d88734eac994 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:745bb2dcd4a95e55380f83a4750f50e280e9004e1f2b9cbc560a81f2f4733102 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ee8dcb8143b623fa11f9e24784781686c41e1ea --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eefe2c9081d9d460c9036dd5d8e9c4b66a27594bfc02c0ca210c6c1db40c6e9c +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7c6013628f8c59328eb90373dbeae96e3433408 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc5acbb1710c239d4c68ccc770c8fab8fff1e7083a6efe558a2e4bf79e66f54 +size 27478231 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7357dcdfe957923385240f680a6e9d65729f8adf --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7035e6a83d8bc4fa18395e2f9aec73cf9da3fd33b6a0736401d6dcfffaaf4d2 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..93035d8a609d997650a5c7d8e97962aa0c3719ca --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d1fd10e926411de1fde937b1212f9ab71f62331252f783e085f3ad1c728a320 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1da5e70c1ed8e3cf26cc96217e27aee90373e555 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32884eba46891fad436d1095cdcc49a7b66cc821e540e9ef5af1f5bd3388385c +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f28c86821fa3730453c1a6899ecd6d15f1a18d0 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d37be534596eb72112084d2e5c9773db26d12d401aed5ef428295e94cc125819 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef2377a4238a8624e94f6c0398ab2a8948c64c13 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6276d672b4c03d33a90c75723138bb5918a4635329cb66376be5ff21c75e6da2 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..62fabf6e77bcceffbdb25c399982cf3936940e69 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ecee0d0601b15a4efa0d56b56d172b8a7576a61f3a342d43681d11b4e0a9903 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4cc405abf8974693f32ea1217bec86ba4bde216 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da1c048cc68eaa6fd976d880905cd040dcd52173bcf1ccfe1534b5b8ce8516f +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e9cc045f6d3c3cc1f1fd4bc118c4d6c90becb17 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d531a60cc49b8e6170aed2a19a79facfacb783feaa2a8a43876e6526477920dd +size 27478114 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfa6e7f562132329d642e260adb1f18b48bf29fc --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a04cfdf67d763a2abcf41fae73d9b8e9c71790e763c4d51dba2fbeda685376 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7f6620a6c0e091f44e3a166f806a5f0d3acd41f --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd8ad7dca664790a6c4577701998efa33579aa62eb320ba0d1f5475a6b06af1 +size 27478434 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..443ca2370d51f58e52fae0d5e7ba814155970a97 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690a0d84bbc4825a42f185f327a49634918581d7413f77ab28b8e79cfdbc5d08 +size 27478167 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b594e8bb44c089adcf915d3e9afd0e431d9fc7f --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca54820e3f72db672a4e82a86495b8e2dd8f50b0fff14b63d351eacefefcd9d0 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1ed994dfd120abaac83c1753c97edce41ce0d36 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ccef555224f7a666764e7d443595b81d206aa5ce40546f3a653ee745050a841 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..66ed4e0bc98cf4ab9f2c31ea220d96f14f61ff1a --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4598de00988a9d48d11081c4ddee32957ff071f0081149c4c34d9ce91fb1daf +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..06b80db7d6a5768e1e55ce9e71be52b54bb1edfc --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f196244509703c16ebf65f58fd9d7bba36003a1f03097704a4959fb01a21d11 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6903b9500e12a725796b2cb95860575e6e796b26 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0814919c246605efd933acc9cff1d4823f980e6211e1bbda66caed66335ea64 +size 27478434 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5280c27ee96b5404185e2865660d368593586af --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad3dd0080aa6589cf8eed79d133f0dc24e34ce9b3308c6a68dfb4b8de4142298 +size 27478050 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b0bb317c0e65b1d9f51574ba8a7a9e8f5517e2a --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97b251c6451d4a6e4dda261f37f1d43e69dd1e46d1a2b5a370860ca984e29975 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dee64f3af32a77e35b8da524dfd043ad1c988108 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dde5d7dac7d9c5ef9fb6b25c61491091df2c63981148634ea457a2c11896763 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5007746985116c2edaf3be1f5121dba3c0b2a65a --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264196ee1b09c77baecacfedcc99a14b4b11c168991e3b605010b151dda9da02 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d3a5e0901850c7c44907ad6b82c1a56858ef081 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f3561a812e4ad0b6757c3d91fb5f83c4a79db7a1a0f3683e3132e696856756 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c68ebe48130cda8bd25ea07a0341f2f16c4679f --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bc4eb7c3e0a81affe48ad9610840afcba2cb3497e0db17346840ce3a5903113 +size 27478231 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c7c01b2ca3ef93ab11cb27c5097163b604c63fa --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f14ebbba2592e52507f39e077c1e62869cac71e26475cb8659ec8a1ad8f32bfb +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e16f4e849bb53f132a4f93ae56a5f1acc97ef5e3 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0bb86bd08c2831989dc4974c87fec00985f3ad70df76b1cf117f708470c47c8 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e63c53f3c5b8240e34a16fb7d4742ffe143b7eb4 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:232b87983e712614f0d12d39ec8894b7b76edc717192c0f88e33a2959ed0da70 +size 27478434 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b874546525d3fcb7783706884f73610d7bc7e770 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fec1b1ec0dd99ab4356e3a24c730a3f4a315bba1395f8df678b52f7e2719b711 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f02b846e549cef4878c7cef97e5d47acaaf5582a --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b59e5a294283e4a1bee38ace8091a233afdb141adb6436c7839a99e5ebf0df0 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..861fe9f76fb0857d3da3846a98c5ea358ebc826b --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd9b12e4bfc3ace9b3793c968d5e1cec5874d115dfb4cef291ffef0a275dd6b +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f4f10ffcbde72fc7acff67c4125e5fea0bb22d0 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ee45b21b6565f9bfd8a3c8459aaa35dc82d069e53e9e0c4db79117e83a2a6b0 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f833e7552688db6c51133357caff043caba38d6a --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34c450cc89c49cf5d72e4889168a7322a162a477bd1041955b81fb994e181ab9 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cdaafedf932e901aba45c005ff534c2273e89a39 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed80c0f7474837541d251edce5e6ceba9fb46b0b3d6dbd5d081ae07e7fb5746 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..276abdd1134126cf89a6516fc0f9aa81ccf4a8bc --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b40e9f50e04f212c42e46f807b4427d86fb3ccc1e128427eb05c78953cc9067 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f62394ef785b11040b05c1fb7be7e560bf4d5219 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e81535f6bc15d67624e7755679d4dd752097c9a3a814d5f783633ade85a77e5 +size 27478167 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..034878b827d72ba4f647ead6d9563e81776e9da9 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89d926576245f5a8d12d50f88dcd71389100668f45e8dc4bbfcfb429ab605ef8 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6d9eae3a0f019bd59620cfe0700d7fa14525187 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e792a36ad2c4d4e565b2ececef5859eaa126caf813d7976d0dfcbcbd6ddf0ff0 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fc283f5bc0aeb37a6d7c746f6aeea8479e2b27d --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b47dc0a9e33ca60f3df268bce8a4c568717157a3b74cfbe2527da4134eebc1d1 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9834bf6c4d93b05fb27ef5e91d902b8de6284239 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb477d7e92f297711e4238ddd2876cf3a2eec8f05de2f80da35a0c1491e4cd0d +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..af0644a987f3f5b043eee8e9a82aa0ecebe4da57 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7790405f3dbdc24db7ee9d4032df83cacc95e498a7f08ecb79cd13a169a926f0 +size 27478359 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f779503c3d37626cef45b0d6f031aad6294c088e --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d5a367b428454755aa7c50eb5055e066f3a3ae1609e0185c8392007bb1e418e +size 27478103 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f88b667159f85fd7644db39778016d6b803f9750 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7043353f92f80f05db9ab1311c18b993d2883e6e3891a0451c0f92c5e5300ab6 +size 27478359 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..de872a2457497ebb30723a815581f16c3d2e8d6f --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f97ed787d7476c8068ea722f9ac9fe6e88ab99ea1b9bda94c500ca9791edac20 +size 27478167 diff --git a/146m14b14b/global_step21553/layer_01-model_00-model_states.pt b/146m14b14b/global_step21553/layer_01-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..74f6b64094180642a94f2c7484c0c16877a69c0b --- /dev/null +++ b/146m14b14b/global_step21553/layer_01-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26f7da4348a92608b030505676f924147701e385c3eb0f547a612af7623205c3 +size 80413955 diff --git a/146m14b14b/global_step21553/layer_03-model_00-model_states.pt b/146m14b14b/global_step21553/layer_03-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b0ba13b02f26fbea0a543d5699d014b3ef86daf --- /dev/null +++ b/146m14b14b/global_step21553/layer_03-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c638589f6b4a90d032f5cdc16f9b2592a7ee21b981e40ba491ca36723010e06f +size 14180099 diff --git a/146m14b14b/global_step21553/layer_04-model_00-model_states.pt b/146m14b14b/global_step21553/layer_04-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..00830bcc8bbfb72f7e722af071748e8abf7248fe --- /dev/null +++ b/146m14b14b/global_step21553/layer_04-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67b29cb8cd8d9c64e76fc483b8aa1a9ea749d12e16a5879fdcb681e17d1b678b +size 14180099 diff --git a/146m14b14b/global_step21553/layer_05-model_00-model_states.pt b/146m14b14b/global_step21553/layer_05-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bebbe3db427c303b10a1a140a7b5c9d1c92d2c5a --- /dev/null +++ b/146m14b14b/global_step21553/layer_05-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45c24e1e0684998996290bc675fcab4b3bf2ebe2b56c24ee1985d2efb1544ee8 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_06-model_00-model_states.pt b/146m14b14b/global_step21553/layer_06-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..535aa8afdd674eacc06bda0729780a30d80417f8 --- /dev/null +++ b/146m14b14b/global_step21553/layer_06-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64f6d9400f00f0a5fbc703e2e10283f23642314dc76a6cb3bf762911ffd4e9f4 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_07-model_00-model_states.pt b/146m14b14b/global_step21553/layer_07-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0e3a21688eb6e6e3acc94f7d997e71c95828d6c --- /dev/null +++ b/146m14b14b/global_step21553/layer_07-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6809e56c0372cf0529235401ad9a2384f9c56f33139396d327b1b565b608876f +size 14180099 diff --git a/146m14b14b/global_step21553/layer_08-model_00-model_states.pt b/146m14b14b/global_step21553/layer_08-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c48279431e57e0e0bad3c23c84994570deb2cf9 --- /dev/null +++ b/146m14b14b/global_step21553/layer_08-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:683589bcae5d526c06e1bddf2850b2f34141c69137696d500203c9e60b64dbb3 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_09-model_00-model_states.pt b/146m14b14b/global_step21553/layer_09-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3aa9c2647e802a658933ffd020c96fd169837c7a --- /dev/null +++ b/146m14b14b/global_step21553/layer_09-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d76dd0553c1d4bbe2513bd329b359eb6c35b1b62a9e1405d409488b0219c0e56 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_10-model_00-model_states.pt b/146m14b14b/global_step21553/layer_10-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2632222976565cce47117a6d9f8c4c2c774d8a4 --- /dev/null +++ b/146m14b14b/global_step21553/layer_10-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02bf7fb68393f1523da6ed98f62a529531cc624ba7fdc1498c9f69af697fb11b +size 14180099 diff --git a/146m14b14b/global_step21553/layer_11-model_00-model_states.pt b/146m14b14b/global_step21553/layer_11-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d4a1d04665ec8e1eea0f94d89c693812d8b4098 --- /dev/null +++ b/146m14b14b/global_step21553/layer_11-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da858f4a8dac75157dc46aa1b8e0a988ccda59bc3b7e98b76ac661899a3587b +size 14180099 diff --git a/146m14b14b/global_step21553/layer_12-model_00-model_states.pt b/146m14b14b/global_step21553/layer_12-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..14bb922e8c75ca0d89bb8959f2b5bc86d2f64052 --- /dev/null +++ b/146m14b14b/global_step21553/layer_12-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:727959a2573a21c2f8f3de1381905c28f9e65bd24bcc604d2151c4dd0faa8620 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_13-model_00-model_states.pt b/146m14b14b/global_step21553/layer_13-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a84c1d3eb7d6070ce9f90fa88abe953e563d0c4f --- /dev/null +++ b/146m14b14b/global_step21553/layer_13-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0dc11c722ef845f88fe56fc0474a1b979a724dff4fb354be326b29cc20daf04 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_14-model_00-model_states.pt b/146m14b14b/global_step21553/layer_14-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3eabae73278422cdeecf11ec32076d16376fb0c2 --- /dev/null +++ b/146m14b14b/global_step21553/layer_14-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6bd0892f54abb704fb4e1d20c93a942c7fcc13a46c80d96b9e07602fcf1cabc +size 14180099 diff --git a/146m14b14b/global_step21553/layer_15-model_00-model_states.pt b/146m14b14b/global_step21553/layer_15-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e66755d4bd866cfc43cacfa268a7d36f40b15e69 --- /dev/null +++ b/146m14b14b/global_step21553/layer_15-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db6aee284b0073ae4c0c50c178c73882efe14a2933a3460bf6bdfc5caf4cfb33 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_16-model_00-model_states.pt b/146m14b14b/global_step21553/layer_16-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..238363cca0055226550f6f16f072f17de9f331ad --- /dev/null +++ b/146m14b14b/global_step21553/layer_16-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8325c877c0973eeb5cd6fc851345117615e8a0c718a1ef0826a1d8da04d6b9b +size 14180099 diff --git a/146m14b14b/global_step21553/layer_17-model_00-model_states.pt b/146m14b14b/global_step21553/layer_17-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c46d1845b214f8809888800b0f7ebfb4905358e --- /dev/null +++ b/146m14b14b/global_step21553/layer_17-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b32866b166375e614d573134d847f40dfe1894210c0720ceb162b6f92511c138 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_19-model_00-model_states.pt b/146m14b14b/global_step21553/layer_19-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..48d04ddf7ec67b7a7b2514bf2459eaf566aa46e8 --- /dev/null +++ b/146m14b14b/global_step21553/layer_19-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd8a82f3f3d5e625a13e8da4007b919eeb6f8ea27030717c6db3e13fbf77a9a +size 4291 diff --git a/146m14b14b/global_step21553/mp_rank_00_model_states.pt b/146m14b14b/global_step21553/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7daad90a7919a1bc07609fd2044fc05436571cc --- /dev/null +++ b/146m14b14b/global_step21553/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91ceb781c9c18515304d6086d362f2fc281a51775287b5daefeda2df2b5bee0f +size 35123 diff --git a/146m14b14b/logs/2072536.err b/146m14b14b/logs/2072536.err new file mode 100644 index 0000000000000000000000000000000000000000..0c207509bcb7b657cc6a9faff1c680febc669cff --- /dev/null +++ b/146m14b14b/logs/2072536.err @@ -0,0 +1,484 @@ +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_upper_triang_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_upper_triang_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module fused_mix_prec_layer_norm_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module fused_mix_prec_layer_norm_cuda... +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +7: Successfully preprocessed all matching files. +7: Successfully preprocessed all matching files. +7: Successfully preprocessed all matching files. +5: Successfully preprocessed all matching files. +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +7: Building extension module utils... +7: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Loading extension module utils... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +2: +2: +2: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: +4: +4: +4: +4: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +0: Building extension module utils... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module utils... +3: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +0: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +5: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +1: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +7: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +2: Loading extension module utils... +5: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +4: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +5: +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +7: +7: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +7: +7: Loading extension module utils...Loading extension module utils... +7: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +3: +3: Loading extension module utils...Loading extension module utils... +3: +3: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +3: +3: Loading extension module utils...Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +3: +3: +3: Loading extension module utils... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: +0: Loading extension module utils... +0: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +0: +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +0: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +0: +0: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +6: +6: Loading extension module utils... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +6: +2: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +2: +2: Loading extension module utils...Loading extension module utils... +2: +6: Loading extension module utils... +6: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +2: +2: +2: Loading extension module utils...Loading extension module utils... +2: Loading extension module utils... +2: +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings +0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/146m14b14b/logs/2072536.out b/146m14b14b/logs/2072536.out new file mode 100644 index 0000000000000000000000000000000000000000..2516d1d7ed5af8993087dabed56114d770423b3c --- /dev/null +++ b/146m14b14b/logs/2072536.out @@ -0,0 +1,9657 @@ +Model parameters: d_model 768 ffw_size 3072 kv_size 64 n_heads 12 n_layers 15 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 1 --pipeline-model-parallel-size 1 --num-layers 15 --hidden-size 768 --num-attention-heads 12 --kv-channels 64 --ffn-hidden-size 3072 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 4 --global-batch-size 256 --train-samples 5_517_578 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --clip-grad 1.0 --kill-switch-path kill-switch-146m --bf16 --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 5_517_578 --lr-warmup-samples 55_176 --clip-grad 1.0 --weight-decay 1e-1 --log-interval 10 --save-interval 1000 --eval-interval 1000 --eval-iters 1 --tensorboard-dir tensorboard_146m --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save checkpoints_146m --load checkpoints_146m --data-path /scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document --data-impl mmap --split 949,50,1 --deepspeed --deepspeed_config ds_configs/2072536.json --zero-stage 0 +START 2072536: Fri Nov 25 17:29:47 EET 2022 +0: +0: +0: ======================= ROCm System Management Interface ======================= +0: ================================= Concise Info ================================= +0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +0: 0 42.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 2 42.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 3 38.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 4 44.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 6 44.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: ================================================================================ +0: ============================= End of ROCm SMI Log ============================== +4: +4: +4: ======================= ROCm System Management Interface ======================= +4: ================================= Concise Info ================================= +4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +4: 0 44.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 2 45.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 4 48.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 6 38.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: ================================================================================ +4: ============================= End of ROCm SMI Log ============================== +1: +1: +1: ======================= ROCm System Management Interface ======================= +1: ================================= Concise Info ================================= +1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +1: 0 46.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 2 37.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 4 43.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 5 51.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 6 42.0c 100.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: ================================================================================ +1: ============================= End of ROCm SMI Log ============================== +3: +3: +3: ======================= ROCm System Management Interface ======================= +3: ================================= Concise Info ================================= +3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +3: 0 41.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 2 43.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 4 42.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 5 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 6 45.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: ================================================================================ +3: ============================= End of ROCm SMI Log ============================== +7: +7: +7: ======================= ROCm System Management Interface ======================= +7: ================================= Concise Info ================================= +7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +7: 0 43.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 2 47.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 4 44.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 6 44.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: ================================================================================ +7: ============================= End of ROCm SMI Log ============================== +2: +2: +2: ======================= ROCm System Management Interface ======================= +2: ================================= Concise Info ================================= +2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +2: 0 48.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 2 37.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 4 48.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 5 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 6 41.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: ================================================================================ +2: ============================= End of ROCm SMI Log ============================== +5: +5: +5: ======================= ROCm System Management Interface ======================= +5: ================================= Concise Info ================================= +5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +5: 0 42.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 2 43.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 4 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 6 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: ================================================================================ +5: ============================= End of ROCm SMI Log ============================== +6: +6: +6: ======================= ROCm System Management Interface ======================= +6: ================================= Concise Info ================================= +6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +6: 0 43.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 2 45.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 3 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 4 46.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 6 40.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: ================================================================================ +6: ============================= End of ROCm SMI Log ============================== +7: Launching on nid005079 (7/8), master nid005072 port 9999, GPUs 8, CUDA: True +6: Launching on nid005078 (6/8), master nid005072 port 9999, GPUs 8, CUDA: True +0: Launching on nid005072 (0/8), master nid005072 port 9999, GPUs 8, CUDA: True +3: Launching on nid005075 (3/8), master nid005072 port 9999, GPUs 8, CUDA: True +2: Launching on nid005074 (2/8), master nid005072 port 9999, GPUs 8, CUDA: True +4: Launching on nid005076 (4/8), master nid005072 port 9999, GPUs 8, CUDA: True +5: Launching on nid005077 (5/8), master nid005072 port 9999, GPUs 8, CUDA: True +1: Launching on nid005073 (1/8), master nid005072 port 9999, GPUs 8, CUDA: True +0: using world size: 64, data-parallel-size: 64, tensor-model-parallel size: 1, pipeline-model-parallel size: 1 +0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. +0: using torch.bfloat16 for parameters ... +0: ------------------------ arguments ------------------------ +0: abort_on_unmet_fused_kernel_constraints ......... False +0: accumulate_allreduce_grads_in_fp32 .............. True +0: adam_beta1 ...................................... 0.9 +0: adam_beta2 ...................................... 0.999 +0: adam_eps ........................................ 1e-08 +0: adlr_autoresume ................................. False +0: adlr_autoresume_interval ........................ 1000 +0: apply_query_key_layer_scaling ................... True +0: apply_residual_connection_post_layernorm ........ False +0: attention_dropout ............................... 0.1 +0: attention_softmax_in_fp32 ....................... False +0: bert_binary_head ................................ True +0: bert_load ....................................... None +0: bf16 ............................................ True +0: bias_dropout_fusion ............................. True +0: bias_gelu_fusion ................................ True +0: biencoder_projection_dim ........................ 0 +0: biencoder_shared_query_context_model ............ False +0: block_data_path ................................. None +0: checkpoint_activations .......................... False +0: checkpoint_in_cpu ............................... False +0: checkpoint_num_layers ........................... 1 +0: clip_grad ....................................... 1.0 +0: codecarbon_dir .................................. None +0: consumed_train_samples .......................... 0 +0: consumed_train_tokens ........................... 0 +0: consumed_valid_samples .......................... 0 +0: contigious_checkpointing ........................ False +0: cpu_optimizer ................................... False +0: cpu_torch_adam .................................. False +0: curriculum_learning ............................. False +0: data_impl ....................................... mmap +0: data_parallel_size .............................. 64 +0: data_path ....................................... ['/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document'] +0: dataloader_type ................................. single +0: DDP_impl ........................................ local +0: decoder_seq_length .............................. None +0: deepscale ....................................... False +0: deepscale_config ................................ None +0: deepspeed ....................................... True +0: deepspeed_activation_checkpointing .............. False +0: deepspeed_config ................................ ds_configs/2072536.json +0: deepspeed_mpi ................................... False +0: distribute_checkpointed_activations ............. False +0: distributed_backend ............................. nccl +0: embed_layernorm ................................. False +0: embedding_path .................................. None +0: encoder_seq_length .............................. 2048 +0: eod_mask_loss ................................... False +0: eval_interval ................................... 1000 +0: eval_iters ...................................... 1 +0: eval_only ....................................... None +0: evidence_data_path .............................. None +0: exit_duration_in_mins ........................... None +0: exit_interval ................................... None +0: ffn_hidden_size ................................. 3072 +0: finetune ........................................ False +0: fp16 ............................................ False +0: fp16_lm_cross_entropy ........................... False +0: fp32_residual_connection ........................ False +0: gigaflos_no_embeds .............................. 0 +0: global_batch_size ............................... 256 +0: glu_activation .................................. None +0: hidden_dropout .................................. 0.1 +0: hidden_size ..................................... 768 +0: hysteresis ...................................... 2 +0: ict_head_size ................................... None +0: ict_load ........................................ None +0: img_dim ......................................... 224 +0: indexer_batch_size .............................. 128 +0: indexer_log_interval ............................ 1000 +0: inference ....................................... False +0: init_method_std ................................. 0.02 +0: init_method_xavier_uniform ...................... False +0: initial_loss_scale .............................. 4294967296 +0: kill_switch_path ................................ kill-switch-146m +0: kv_channels ..................................... 64 +0: layer_norm_fusion ............................... True +0: layernorm_epsilon ............................... 1e-05 +0: lazy_mpu_init ................................... None +0: load ............................................ checkpoints_146m +0: local_rank ...................................... None +0: log_batch_size_to_tensorboard ................... True +0: log_interval .................................... 10 +0: log_learning_rate_to_tensorboard ................ True +0: log_level ....................................... None +0: log_level_replica ............................... None +0: log_loss_scale_to_tensorboard ................... True +0: log_num_zeros_in_grad ........................... False +0: log_params_norm ................................. False +0: log_path ........................................ None +0: log_timers_to_tensorboard ....................... True +0: log_validation_ppl_to_tensorboard ............... True +0: loss_on_targets_only ............................ False +0: loss_scale ...................................... None +0: loss_scale_window ............................... 1000 +0: lr .............................................. 0.0002 +0: lr_decay_iters .................................. None +0: lr_decay_samples ................................ 5517578 +0: lr_decay_style .................................. cosine +0: lr_decay_tokens ................................. None +0: lr_warmup_fraction .............................. None +0: lr_warmup_iters ................................. 0 +0: lr_warmup_samples ............................... 55176 +0: make_vocab_size_divisible_by .................... 128 +0: mask_prob ....................................... 0.15 +0: masked_softmax_fusion ........................... True +0: max_position_embeddings ......................... 2048 +0: mean_noise_span_length .......................... None +0: memory_centric_tiled_linear ..................... False +0: merge_file ...................................... gpt2/merges.txt +0: micro_batch_size ................................ 4 +0: min_loss_scale .................................. 1.0 +0: min_lr .......................................... 2e-05 +0: mmap_warmup ..................................... False +0: no_load_optim ................................... None +0: no_load_rng ..................................... None +0: no_save_optim ................................... None +0: no_save_rng ..................................... None +0: noise_density ................................... None +0: num_attention_heads ............................. 12 +0: num_channels .................................... 3 +0: num_classes ..................................... 1000 +0: num_layers ...................................... 15 +0: num_layers_per_virtual_pipeline_stage ........... None +0: num_workers ..................................... 2 +0: onnx_safe ....................................... None +0: openai_gelu ..................................... False +0: optimizer ....................................... adam +0: optimizer_fusion ................................ True +0: override_lr_scheduler ........................... False +0: pad_vocab_size_to ............................... None +0: params_dtype .................................... torch.bfloat16 +0: partition_activations ........................... False +0: patch_dim ....................................... 16 +0: pipeline_model_parallel_size .................... 1 +0: position_embedding_type ......................... PositionEmbeddingType.absolute +0: pp_partition_method ............................. None +0: profile_backward ................................ False +0: query_in_block_prob ............................. 0.1 +0: rampup_batch_size ............................... None +0: rank ............................................ 0 +0: remote_device ................................... none +0: reset_attention_mask ............................ False +0: reset_position_ids .............................. False +0: retriever_report_topk_accuracies ................ [] +0: retriever_score_scaling ......................... False +0: retriever_seq_length ............................ 256 +0: reweight_loss_based_on_position_frequency ....... False +0: sample_rate ..................................... 1.0 +0: save ............................................ checkpoints_146m +0: save_interval ................................... 1000 +0: scatter_gather_tensors_in_pipeline .............. True +0: scattered_embeddings ............................ False +0: seed ............................................ 1234 +0: seq_length ...................................... 2048 +0: sgd_momentum .................................... 0.9 +0: short_seq_prob .................................. 0.1 +0: skip_train_iteration_range ...................... None +0: split ........................................... 949,50,1 +0: split_transformers .............................. False +0: sync_tp_duplicated_parameters ................... False +0: synchronize_each_layer .......................... False +0: tensor_model_parallel_size ...................... 1 +0: tensorboard_dir ................................. tensorboard_146m +0: tensorboard_log_interval ........................ 1 +0: tensorboard_queue_size .......................... 5 +0: test_weighted_split_names ....................... None +0: test_weighted_split_paths ....................... None +0: test_weighted_split_paths_path .................. None +0: test_weighted_split_splits ...................... None +0: test_weighted_split_weights ..................... None +0: tile_factor ..................................... 1 +0: titles_data_path ................................ None +0: tokenizer_name_or_path .......................... None +0: tokenizer_type .................................. GPT2BPETokenizer +0: train_iters ..................................... None +0: train_samples ................................... 5517578 +0: train_tokens .................................... None +0: train_weighted_split_paths ...................... None +0: train_weighted_split_paths_path ................. None +0: universal_checkpoint ............................ False +0: use_bnb_optimizer ............................... False +0: use_checkpoint_lr_scheduler ..................... False +0: use_contiguous_buffers_in_ddp ................... True +0: use_cpu_initialization .......................... None +0: use_one_sent_docs ............................... False +0: use_pin_memory .................................. False +0: valid_num_workers ............................... 2 +0: valid_weighted_split_names ...................... None +0: valid_weighted_split_paths ...................... None +0: valid_weighted_split_paths_path ................. None +0: valid_weighted_split_splits ..................... None +0: valid_weighted_split_weights .................... None +0: virtual_pipeline_model_parallel_size ............ None +0: vocab_extra_ids ................................. 0 +0: vocab_file ...................................... gpt2/vocab.json +0: weight_decay .................................... 0.1 +0: world_size ...................................... 64 +0: zero_allgather_bucket_size ...................... 0.0 +0: zero_contigious_gradients ....................... False +0: zero_reduce_bucket_size ......................... 0.0 +0: zero_reduce_scatter ............................. False +0: zero_stage ...................................... 0 +0: -------------------- end of arguments --------------------- +0: setting number of micro-batches to constant 1 +0: > building GPT2BPETokenizer tokenizer ... +0: > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304) +0: DeepSpeed general environment info: +0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] +0: torch version .................... 1.13.0+rocm5.2 +0: torch cuda version ............... None +0: torch hip version ................ 5.2.21151-afdc89f8 +0: nvcc version ..................... None +0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] +0: deepspeed info ................... 0.7.5, unknown, unknown +0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** +0: > initializing torch distributed ... +0: [2022-11-25 17:30:49,877] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +7: > setting tensorboard ... +0: > initializing tensor model parallel with size 1 +0: > initializing pipeline model parallel with size 1 +0: > setting random seeds to 1234 ... +0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 +0: > compiling dataset index builder ... +0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: make: Nothing to be done for 'default'. +0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: >>> done with dataset index builder. Compilation time: 0.091 seconds +0: > compiling and loading fused kernels ... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 87 +0: [1/1] c++ scaled_upper_triang_masked_softmax_hip.cuda.o scaled_upper_triang_masked_softmax_hip.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o scaled_upper_triang_masked_softmax_cuda.so +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 63 +0: ninja: no work to do. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 67 +0: [1/1] c++ layer_norm_hip_kernel.cuda.o layer_norm_cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o fused_mix_prec_layer_norm_cuda.so +0: >>> done with compiling and loading fused kernels. Compilation time: 21.454 seconds +0: time to initialize megatron (seconds): 69.261 +0: [after megatron is initialized] datetime: 2022-11-25 17:31:17 +0: building GPT model ... +0: [2022-11-25 17:31:17,315] [INFO] [utils.py:827:see_memory_usage] Before Building Model +0: [2022-11-25 17:31:17,315] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB +0: [2022-11-25 17:31:17,315] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 29.14 GB, percent = 5.8% +0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None +0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=1, model=0): 1, ProcessCoord(pipe=0, data=2, model=0): 2, ProcessCoord(pipe=0, data=3, model=0): 3, ProcessCoord(pipe=0, data=4, model=0): 4, ProcessCoord(pipe=0, data=5, model=0): 5, ProcessCoord(pipe=0, data=6, model=0): 6, ProcessCoord(pipe=0, data=7, model=0): 7, ProcessCoord(pipe=0, data=8, model=0): 8, ProcessCoord(pipe=0, data=9, model=0): 9, ProcessCoord(pipe=0, data=10, model=0): 10, ProcessCoord(pipe=0, data=11, model=0): 11, ProcessCoord(pipe=0, data=12, model=0): 12, ProcessCoord(pipe=0, data=13, model=0): 13, ProcessCoord(pipe=0, data=14, model=0): 14, ProcessCoord(pipe=0, data=15, model=0): 15, ProcessCoord(pipe=0, data=16, model=0): 16, ProcessCoord(pipe=0, data=17, model=0): 17, ProcessCoord(pipe=0, data=18, model=0): 18, ProcessCoord(pipe=0, data=19, model=0): 19, ProcessCoord(pipe=0, data=20, model=0): 20, ProcessCoord(pipe=0, data=21, model=0): 21, ProcessCoord(pipe=0, data=22, model=0): 22, ProcessCoord(pi +0: pe=0, data=23, model=0): 23, ProcessCoord(pipe=0, data=24, model=0): 24, ProcessCoord(pipe=0, data=25, model=0): 25, ProcessCoord(pipe=0, data=26, model=0): 26, ProcessCoord(pipe=0, data=27, model=0): 27, ProcessCoord(pipe=0, data=28, model=0): 28, ProcessCoord(pipe=0, data=29, model=0): 29, ProcessCoord(pipe=0, data=30, model=0): 30, ProcessCoord(pipe=0, data=31, model=0): 31, ProcessCoord(pipe=0, data=32, model=0): 32, ProcessCoord(pipe=0, data=33, model=0): 33, ProcessCoord(pipe=0, data=34, model=0): 34, ProcessCoord(pipe=0, data=35, model=0): 35, ProcessCoord(pipe=0, data=36, model=0): 36, ProcessCoord(pipe=0, data=37, model=0): 37, ProcessCoord(pipe=0, data=38, model=0): 38, ProcessCoord(pipe=0, data=39, model=0): 39, ProcessCoord(pipe=0, data=40, model=0): 40, ProcessCoord(pipe=0, data=41, model=0): 41, ProcessCoord(pipe=0, data=42, model=0): 42, ProcessCoord(pipe=0, data=43, model=0): 43, ProcessCoord(pipe=0, data=44, model=0): 44, ProcessCoord(pipe=0, data=45, model=0): 45, ProcessCoord(pipe=0, data=4 +0: 6, model=0): 46, ProcessCoord(pipe=0, data=47, model=0): 47, ProcessCoord(pipe=0, data=48, model=0): 48, ProcessCoord(pipe=0, data=49, model=0): 49, ProcessCoord(pipe=0, data=50, model=0): 50, ProcessCoord(pipe=0, data=51, model=0): 51, ProcessCoord(pipe=0, data=52, model=0): 52, ProcessCoord(pipe=0, data=53, model=0): 53, ProcessCoord(pipe=0, data=54, model=0): 54, ProcessCoord(pipe=0, data=55, model=0): 55, ProcessCoord(pipe=0, data=56, model=0): 56, ProcessCoord(pipe=0, data=57, model=0): 57, ProcessCoord(pipe=0, data=58, model=0): 58, ProcessCoord(pipe=0, data=59, model=0): 59, ProcessCoord(pipe=0, data=60, model=0): 60, ProcessCoord(pipe=0, data=61, model=0): 61, ProcessCoord(pipe=0, data=62, model=0): 62, ProcessCoord(pipe=0, data=63, model=0): 63} +0: [2022-11-25 17:31:19,530] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer +0: stage=0 layers=22 +0: 0: _to_float16 +0: 1: EmbeddingPipe +0: 2: +0: 3: ParallelTransformerLayerPipe +0: 4: ParallelTransformerLayerPipe +0: 5: ParallelTransformerLayerPipe +0: 6: ParallelTransformerLayerPipe +0: 7: ParallelTransformerLayerPipe +0: 8: ParallelTransformerLayerPipe +0: 9: ParallelTransformerLayerPipe +0: 10: ParallelTransformerLayerPipe +0: 11: ParallelTransformerLayerPipe +0: 12: ParallelTransformerLayerPipe +0: 13: ParallelTransformerLayerPipe +0: 14: ParallelTransformerLayerPipe +0: 15: ParallelTransformerLayerPipe +0: 16: ParallelTransformerLayerPipe +0: 17: ParallelTransformerLayerPipe +0: 18: undo +0: 19: MixedFusedLayerNorm +0: 20: EmbeddingPipe +0: 21: float16_to_fp32 +0: loss: CrossEntropy +0: [2022-11-25 17:31:19,842] [INFO] [utils.py:827:see_memory_usage] After Building Model +0: [2022-11-25 17:31:19,843] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.28 GB CA 0.29 GB Max_CA 0 GB +0: [2022-11-25 17:31:19,843] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 29.16 GB, percent = 5.8% +0: setting training iterations to 21553 +0: > learning rate decay style: cosine +0: DeepSpeed is enabled. +0: [2022-11-25 17:31:19,845] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +0: [2022-11-25 17:31:32,893] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +0: [2022-11-25 17:31:32,894] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +0: [2022-11-25 17:31:32,894] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +0: [2022-11-25 17:31:32,898] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +0: [2022-11-25 17:31:32,898] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +0: [2022-11-25 17:31:32,940] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer +0: [2022-11-25 17:31:32,941] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.29 GB CA 0.31 GB Max_CA 0 GB +0: [2022-11-25 17:31:32,941] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 29.82 GB, percent = 5.9% +7: ninja: no work to do. +7: Time to load utils op: 0.22454023361206055 seconds +0: ninja: no work to do. +0: Time to load utils op: 0.12721824645996094 seconds +3: Time to load utils op: 0.3108365535736084 seconds +0: Time to load utils op: 0.20401978492736816 secondsTime to load utils op: 0.20418214797973633 seconds +0: +0: Time to load utils op: 0.20402932167053223 seconds +0: Time to load utils op: 0.20391607284545898 seconds +0: Time to load utils op: 0.20438313484191895 seconds +0: Time to load utils op: 0.20429658889770508 seconds +3: Time to load utils op: 0.20341730117797852 seconds +3: Time to load utils op: 0.20348358154296875 seconds +3: Time to load utils op: 0.20430707931518555 seconds +3: Time to load utils op: 0.2043137550354004 seconds +3: Time to load utils op: 0.20405030250549316 seconds +3: Time to load utils op: 0.205003023147583 seconds +3: Time to load utils op: 0.2050163745880127 seconds +0: Time to load utils op: 0.3040347099304199 seconds +7: Time to load utils op: 0.20331907272338867 seconds +7: Time to load utils op: 0.20355963706970215 seconds +7: Time to load utils op: 0.20377683639526367 seconds +7: Time to load utils op: 0.20396137237548828 seconds +7: Time to load utils op: 0.20304203033447266 seconds +7: Time to load utils op: 0.20418548583984375 seconds +7: Time to load utils op: 0.20418381690979004 seconds +5: Time to load utils op: 0.21235156059265137 seconds +5: Time to load utils op: 0.2133638858795166 seconds +5: Time to load utils op: 0.21322965621948242 seconds +5: Time to load utils op: 0.21224355697631836 seconds +5: Time to load utils op: 0.2130880355834961 seconds +5: Time to load utils op: 0.21321868896484375 secondsTime to load utils op: 0.21219229698181152 seconds +5: +5: Time to load utils op: 0.21358418464660645 seconds +2: Time to load utils op: 0.21208596229553223 secondsTime to load utils op: 0.21208953857421875 seconds +2: +1: Time to load utils op: 0.20953917503356934 secondsTime to load utils op: 0.21245121955871582 seconds +1: +1: Time to load utils op: 0.20949721336364746 secondsTime to load utils op: 0.2124629020690918 secondsTime to load utils op: 0.21246743202209473 seconds +1: +1: +2: Time to load utils op: 0.21212363243103027 seconds +1: Time to load utils op: 0.2095632553100586 seconds +2: Time to load utils op: 0.212111234664917 secondsTime to load utils op: 0.2121427059173584 secondsTime to load utils op: 0.21210384368896484 secondsTime to load utils op: 0.21211695671081543 seconds +1: Time to load utils op: 0.20984721183776855 seconds +2: +2: +2: +2: Time to load utils op: 0.21212482452392578 seconds +1: Time to load utils op: 0.20951104164123535 seconds +4: Time to load utils op: 0.2107067108154297 seconds +4: Time to load utils op: 0.21071672439575195 secondsTime to load utils op: 0.21073007583618164 seconds +4: +4: Time to load utils op: 0.21072912216186523 seconds +4: Time to load utils op: 0.2107398509979248 secondsTime to load utils op: 0.2106938362121582 seconds +4: +4: Time to load utils op: 0.21076130867004395 seconds +4: Time to load utils op: 0.21075963973999023 seconds +6: Time to load utils op: 0.21063852310180664 seconds +6: Time to load utils op: 0.21067333221435547 secondsTime to load utils op: 0.2106795310974121 secondsTime to load utils op: 0.2106618881225586 seconds +6: +6: +6: Time to load utils op: 0.2106800079345703 seconds +6: Time to load utils op: 0.21065521240234375 seconds +6: Time to load utils op: 0.2106935977935791 seconds +6: Time to load utils op: 0.21071124076843262 seconds +0: [2022-11-25 17:31:33,281] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 +0: [2022-11-25 17:31:33,281] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.28 GB CA 0.31 GB Max_CA 0 GB +0: [2022-11-25 17:31:33,281] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 29.82 GB, percent = 5.9% +4: Time to load utils op: 0.0011153221130371094 seconds +4: Time to load utils op: 0.0011026859283447266 seconds +4: Time to load utils op: 0.0012357234954833984 seconds +4: Time to load utils op: 0.0013074874877929688 seconds +4: Time to load utils op: 0.0013608932495117188 seconds +4: Time to load utils op: 0.0013332366943359375 seconds +4: Time to load utils op: 0.0013802051544189453 seconds +4: Time to load utils op: 0.0013701915740966797 seconds +5: Time to load utils op: 0.0007240772247314453 seconds +5: Time to load utils op: 0.0009176731109619141 seconds +5: Time to load utils op: 0.0008957386016845703 seconds +5: Time to load utils op: 0.001009225845336914 secondsTime to load utils op: 0.001018524169921875 seconds +5: +5: Time to load utils op: 0.0010840892791748047 seconds +7: Time to load utils op: 0.0004947185516357422 seconds +5: Time to load utils op: 0.001008749008178711 seconds +5: Time to load utils op: 0.001096963882446289 seconds +7: Time to load utils op: 0.0005450248718261719 secondsTime to load utils op: 0.0005037784576416016 seconds +7: Time to load utils op: 0.0005598068237304688 seconds +7: +7: Time to load utils op: 0.0005767345428466797 seconds +7: Time to load utils op: 0.000537872314453125 seconds +7: Time to load utils op: 0.00047779083251953125 seconds +7: Time to load utils op: 0.0005440711975097656 seconds +3: Time to load utils op: 0.0004966259002685547 seconds +3: Time to load utils op: 0.0003371238708496094 seconds +3: Time to load utils op: 0.0005090236663818359 seconds +3: Time to load utils op: 0.0005834102630615234 secondsTime to load utils op: 0.0005717277526855469 seconds +3: +3: Time to load utils op: 0.0006680488586425781 secondsTime to load utils op: 0.0006232261657714844 seconds +3: +3: Time to load utils op: 0.0006413459777832031 seconds +1: Time to load utils op: 0.0005042552947998047 seconds +1: Time to load utils op: 0.0003457069396972656 seconds +1: Time to load utils op: 0.0005695819854736328 seconds +1: Time to load utils op: 0.0004100799560546875 seconds +0: Time to load utils op: 0.0005743503570556641 secondsTime to load utils op: 0.0005550384521484375 seconds +0: +0: Time to load utils op: 0.00052642822265625 seconds +1: Time to load utils op: 0.0003871917724609375 seconds +0: Time to load utils op: 0.0006182193756103516 seconds +0: Time to load utils op: 0.0004897117614746094 seconds +1: Time to load utils op: 0.00039005279541015625 seconds +0: Time to load utils op: 0.0006842613220214844 seconds +0: Time to load utils op: 0.000579833984375 seconds +6: Time to load utils op: 0.0006992816925048828 seconds +1: Time to load utils op: 0.00041604042053222656 seconds +1: Time to load utils op: 0.0005102157592773438 seconds +6: Time to load utils op: 0.0013186931610107422 seconds +2: Time to load utils op: 0.0010666847229003906 seconds +0: [2022-11-25 17:31:33,468] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 +6: Time to load utils op: 0.0013072490692138672 seconds +2: Time to load utils op: 0.0012025833129882812 seconds +6: Time to load utils op: 0.001360177993774414 seconds +6: Time to load utils op: 0.0013630390167236328 seconds +6: Time to load utils op: 0.0014715194702148438 secondsTime to load utils op: 0.0013706684112548828 seconds +2: Time to load utils op: 0.0012018680572509766 seconds +6: +2: Time to load utils op: 0.0012028217315673828 secondsTime to load utils op: 0.0011646747589111328 secondsTime to load utils op: 0.0011796951293945312 seconds +2: +2: +6: Time to load utils op: 0.0014314651489257812 seconds +2: Time to load utils op: 0.001214742660522461 seconds +2: Time to load utils op: 0.0012989044189453125 seconds +0: [2022-11-25 17:31:33,469] [INFO] [utils.py:828:see_memory_usage] MA 0.62 GB Max_MA 0.62 GB CA 0.82 GB Max_CA 1 GB +0: [2022-11-25 17:31:33,469] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 29.83 GB, percent = 5.9% +0: [2022-11-25 17:31:33,509] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 +0: [2022-11-25 17:31:33,510] [INFO] [utils.py:828:see_memory_usage] MA 0.62 GB Max_MA 0.62 GB CA 0.82 GB Max_CA 1 GB +0: [2022-11-25 17:31:33,510] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 29.97 GB, percent = 6.0% +0: [2022-11-25 17:31:33,542] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 +0: [2022-11-25 17:31:33,543] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2022-11-25 17:31:33,543] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 29.97 GB, percent = 6.0% +0: [2022-11-25 17:31:33,573] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 +0: [2022-11-25 17:31:33,574] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2022-11-25 17:31:33,574] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 29.97 GB, percent = 6.0% +0: [2022-11-25 17:31:33,607] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 +0: [2022-11-25 17:31:33,607] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2022-11-25 17:31:33,608] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 29.97 GB, percent = 6.0% +0: [2022-11-25 17:31:33,638] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer +0: [2022-11-25 17:31:33,638] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2022-11-25 17:31:33,639] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 29.97 GB, percent = 6.0% +0: [2022-11-25 17:31:33,674] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer +0: [2022-11-25 17:31:33,675] [INFO] [utils.py:828:see_memory_usage] MA 0.85 GB Max_MA 0.85 GB CA 1.13 GB Max_CA 1 GB +0: [2022-11-25 17:31:33,675] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 29.97 GB, percent = 6.0% +0: [2022-11-25 17:31:33,705] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer +0: [2022-11-25 17:31:33,706] [INFO] [utils.py:828:see_memory_usage] MA 0.85 GB Max_MA 0.85 GB CA 1.13 GB Max_CA 1 GB +0: [2022-11-25 17:31:33,706] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 29.97 GB, percent = 6.0% +0: [2022-11-25 17:31:33,706] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +0: [2022-11-25 17:31:33,706] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler +0: [2022-11-25 17:31:33,706] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +0: [2022-11-25 17:31:33,706] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0, 0.0], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +0: [2022-11-25 17:31:33,707] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: +0: [2022-11-25 17:31:33,707] [INFO] [config.py:1011:print] activation_checkpointing_config { +0: "partition_activations": false, +0: "contiguous_memory_optimization": false, +0: "cpu_checkpointing": false, +0: "number_checkpoints": null, +0: "synchronize_checkpoint_boundary": false, +0: "profile": false +0: } +0: [2022-11-25 17:31:33,707] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +0: [2022-11-25 17:31:33,707] [INFO] [config.py:1011:print] amp_enabled .................. False +0: [2022-11-25 17:31:33,707] [INFO] [config.py:1011:print] amp_params ................... False +0: [2022-11-25 17:31:33,707] [INFO] [config.py:1011:print] autotuning_config ............ { +0: "enabled": false, +0: "start_step": null, +0: "end_step": null, +0: "metric_path": null, +0: "arg_mappings": null, +0: "metric": "throughput", +0: "model_info": null, +0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", +0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", +0: "overwrite": true, +0: "fast": true, +0: "start_profile_step": 3, +0: "end_profile_step": 5, +0: "tuner_type": "gridsearch", +0: "tuner_early_stopping": 5, +0: "tuner_num_trials": 50, +0: "model_info_path": null, +0: "mp_size": 1, +0: "max_train_batch_size": null, +0: "min_train_batch_size": 1, +0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, +0: "min_train_micro_batch_size_per_gpu": 1, +0: "num_tuning_micro_batch_sizes": 3 +0: } +0: [2022-11-25 17:31:33,707] [INFO] [config.py:1011:print] bfloat16_enabled ............. True +0: [2022-11-25 17:31:33,707] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False +0: [2022-11-25 17:31:33,707] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True +0: [2022-11-25 17:31:33,707] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False +0: [2022-11-25 17:31:33,707] [INFO] [config.py:1011:print] comms_config ................. +0: [2022-11-25 17:31:33,707] [INFO] [config.py:1011:print] communication_data_type ...... None +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa +0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] curriculum_enabled ........... False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] curriculum_params ............ False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] dataloader_drop_last ......... False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] disable_allgather ............ False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] dump_state ................... False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] elasticity_enabled ........... False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] flops_profiler_config ........ { +0: "enabled": false, +0: "profile_step": 1, +0: "module_depth": -1, +0: "top_modules": 1, +0: "detailed": true, +0: "output_file": null +0: } +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] fp16_auto_cast ............... None +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] fp16_enabled ................. False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] global_rank .................. 0 +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 1 +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] load_universal_checkpoint .... False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] loss_scale ................... 1.0 +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] memory_breakdown ............. False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] monitor_config ............... +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] nebula_config ................ { +0: "enabled": false, +0: "persistent_storage_path": null, +0: "persistent_time_interval": 100, +0: "num_of_version_in_retention": 2, +0: "enable_nebula_load": true, +0: "load_path": null +0: } +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] optimizer_name ............... None +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] optimizer_params ............. None +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] pld_enabled .................. False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] pld_params ................... False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] prescale_gradients ........... False +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] scheduler_name ............... None +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] scheduler_params ............. None +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] sparse_attention ............. None +0: [2022-11-25 17:31:33,708] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False +0: [2022-11-25 17:31:33,709] [INFO] [config.py:1011:print] steps_per_print .............. 2000 +0: [2022-11-25 17:31:33,709] [INFO] [config.py:1011:print] train_batch_size ............. 256 +0: [2022-11-25 17:31:33,709] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 4 +0: [2022-11-25 17:31:33,709] [INFO] [config.py:1011:print] use_node_local_storage ....... False +0: [2022-11-25 17:31:33,709] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False +0: [2022-11-25 17:31:33,709] [INFO] [config.py:1011:print] world_size ................... 64 +0: [2022-11-25 17:31:33,709] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False +0: [2022-11-25 17:31:33,709] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +0: [2022-11-25 17:31:33,709] [INFO] [config.py:1011:print] zero_enabled ................. False +0: [2022-11-25 17:31:33,709] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 +0: [2022-11-25 17:31:33,709] [INFO] [config.py:996:print_user_config] json = { +0: "train_micro_batch_size_per_gpu": 4, +0: "train_batch_size": 256, +0: "gradient_clipping": 1.0, +0: "zero_optimization": { +0: "stage": 0 +0: }, +0: "bf16": { +0: "enabled": true +0: }, +0: "steps_per_print": 2.000000e+03, +0: "wall_clock_breakdown": false +0: } +0: Time to load utils op: 0.0004062652587890625 seconds +0: [2022-11-25 17:31:33,709] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=1 micro_batch_size=4 +0: [2022-11-25 17:31:33,719] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=22 [0, 22) STAGE_PARAMS=146525952 (146.526M) TOTAL_PARAMS=146525952 (146.526M) UNIQUE_PARAMS=146525952 (146.526M) +4: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: WARNING: could not find the metadata file checkpoints_146m +0: will not load any checkpoints and will start from random +3: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2022-11-25 17:31:33,725] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2022-11-25 17:31:33,726] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: time (ms) | load-checkpoint: 5.73 +0: estimated model parameters: 0.146525952 +0: estimated model parameters without embeddings: 0.106319616 +0: [after model, optimizer, and learning rate scheduler are built] datetime: 2022-11-25 17:31:34 +0: > building train, validation, and test datasets ... +0: > datasets target sizes (minimum size): +0: train: 5517578 +0: validation: 5632 +0: test: 256 +0: > building train, validation, and test datasets for GPT ... +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.024323 seconds +0: number of documents: 210604984 +0: > dataset split: +0: train: +0: document indices in [0, 199864130) total of 199864130 documents +0: validation: +0: document indices in [199864130, 210394379) total of 10530249 documents +0: test: +0: document indices in [210394379, 210604984) total of 210605 documents +0: > WARNING: could not find index map files, building the indices on rank 0 ... +0: > only one epoch required, setting separate_last_epoch to False +0: > elasped time to build and save doc-idx mapping (seconds): 14.655996 +0: using: +0: number of documents: 199864130 +0: number of epochs: 1 +0: sequence length: 2048 +0: total number of samples: 173377816 +0: > elasped time to build and save sample-idx mapping (seconds): 4.187417 +0: > building shuffle index with split [0, 173377816) and [173377816, 173377816) ... +0: > elasped time to build and save shuffle-idx mapping (seconds): 10.345480 +0: > loading doc-idx mapping from /scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document_train_indexmap_5517578ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document_train_indexmap_5517578ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document_train_indexmap_5517578ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.172 seconds +0: total number of samples: 173377817 +0: total number of epochs: 1 +0: > WARNING: could not find index map files, building the indices on rank 0 ... +0: > only one epoch required, setting separate_last_epoch to False +0: > elasped time to build and save doc-idx mapping (seconds): 0.516110 +0: using: +0: number of documents: 10530249 +0: number of epochs: 1 +0: sequence length: 2048 +0: total number of samples: 9118344 +0: > elasped time to build and save sample-idx mapping (seconds): 0.206488 +0: > building shuffle index with split [0, 9118344) and [9118344, 9118344) ... +0: > elasped time to build and save shuffle-idx mapping (seconds): 0.273196 +0: > loading doc-idx mapping from /scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document_valid_indexmap_5632ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document_valid_indexmap_5632ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document_valid_indexmap_5632ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.070 seconds +0: total number of samples: 9118345 +0: total number of epochs: 1 +0: > loading doc-idx mapping from /scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document_test_indexmap_256ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document_test_indexmap_256ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document_test_indexmap_256ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.078 seconds +0: total number of samples: 182928 +0: total number of epochs: 1 +0: > finished creating GPT datasets ... +0: [after dataloaders are built] datetime: 2022-11-25 17:32:20 +0: done with setup ... +0: training ... +0: Number of parameters: [tensor rank - pipeline rank] w/ and w/o embeddings: +7: time (ms) | model-and-optimizer-setup: 17176.59 | train/valid/test-data-iterators-setup: 46228.34 +0: [000-000] 0.1465B / 0.1063B +0: [before the start of training step] datetime: 2022-11-25 17:32:20 +0: [Rank 0] (after 10 iterations) memory (MB) | allocated: 2731.02197265625 | max allocated: 22586.583984375 | reserved: 23360.0 | max reserved: 23360.0 +7: iteration 10/ 21553 | consumed samples: 2560 | consumed tokens: 5242880 | elapsed time per iteration (s): 1.27 | learning rate: 9.279E-06 | global batch size: 256 | lm loss: 1.035255E+01 | grad norm: 6.121 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 200.919 | TFLOPs: 7.03 | +7: iteration 20/ 21553 | consumed samples: 5120 | consumed tokens: 10485760 | elapsed time per iteration (s): 0.32 | learning rate: 1.856E-05 | global batch size: 256 | lm loss: 9.227745E+00 | grad norm: 2.011 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 800.510 | TFLOPs: 28.02 | +7: iteration 30/ 21553 | consumed samples: 7680 | consumed tokens: 15728640 | elapsed time per iteration (s): 0.32 | learning rate: 2.784E-05 | global batch size: 256 | lm loss: 8.901884E+00 | grad norm: 1.474 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 812.236 | TFLOPs: 28.43 | +7: iteration 40/ 21553 | consumed samples: 10240 | consumed tokens: 20971520 | elapsed time per iteration (s): 0.31 | learning rate: 3.712E-05 | global batch size: 256 | lm loss: 8.559880E+00 | grad norm: 1.679 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.669 | TFLOPs: 28.59 | +7: iteration 50/ 21553 | consumed samples: 12800 | consumed tokens: 26214400 | elapsed time per iteration (s): 0.31 | learning rate: 4.640E-05 | global batch size: 256 | lm loss: 8.165595E+00 | grad norm: 1.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.148 | TFLOPs: 28.85 | +7: iteration 60/ 21553 | consumed samples: 15360 | consumed tokens: 31457280 | elapsed time per iteration (s): 0.34 | learning rate: 5.568E-05 | global batch size: 256 | lm loss: 7.822748E+00 | grad norm: 1.072 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 761.591 | TFLOPs: 26.66 | +7: iteration 70/ 21553 | consumed samples: 17920 | consumed tokens: 36700160 | elapsed time per iteration (s): 0.31 | learning rate: 6.496E-05 | global batch size: 256 | lm loss: 7.460661E+00 | grad norm: 1.645 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.922 | TFLOPs: 28.88 | +7: iteration 80/ 21553 | consumed samples: 20480 | consumed tokens: 41943040 | elapsed time per iteration (s): 0.33 | learning rate: 7.424E-05 | global batch size: 256 | lm loss: 7.127561E+00 | grad norm: 0.978 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 786.576 | TFLOPs: 27.54 | +7: iteration 90/ 21553 | consumed samples: 23040 | consumed tokens: 47185920 | elapsed time per iteration (s): 0.31 | learning rate: 8.351E-05 | global batch size: 256 | lm loss: 6.908787E+00 | grad norm: 1.168 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.617 | TFLOPs: 28.90 | +7: iteration 100/ 21553 | consumed samples: 25600 | consumed tokens: 52428800 | elapsed time per iteration (s): 0.31 | learning rate: 9.279E-05 | global batch size: 256 | lm loss: 6.696878E+00 | grad norm: 1.610 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.205 | TFLOPs: 29.27 | +7: iteration 110/ 21553 | consumed samples: 28160 | consumed tokens: 57671680 | elapsed time per iteration (s): 0.32 | learning rate: 1.021E-04 | global batch size: 256 | lm loss: 6.535680E+00 | grad norm: 1.225 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 801.930 | TFLOPs: 28.07 | +7: iteration 120/ 21553 | consumed samples: 30720 | consumed tokens: 62914560 | elapsed time per iteration (s): 0.32 | learning rate: 1.114E-04 | global batch size: 256 | lm loss: 6.406283E+00 | grad norm: 1.338 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 800.007 | TFLOPs: 28.01 | +7: iteration 130/ 21553 | consumed samples: 33280 | consumed tokens: 68157440 | elapsed time per iteration (s): 0.31 | learning rate: 1.206E-04 | global batch size: 256 | lm loss: 6.260789E+00 | grad norm: 1.053 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.765 | TFLOPs: 28.59 | +7: iteration 140/ 21553 | consumed samples: 35840 | consumed tokens: 73400320 | elapsed time per iteration (s): 0.32 | learning rate: 1.299E-04 | global batch size: 256 | lm loss: 6.166949E+00 | grad norm: 1.141 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 804.009 | TFLOPs: 28.15 | +7: iteration 150/ 21553 | consumed samples: 38400 | consumed tokens: 78643200 | elapsed time per iteration (s): 0.32 | learning rate: 1.392E-04 | global batch size: 256 | lm loss: 6.097012E+00 | grad norm: 1.323 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 791.995 | TFLOPs: 27.73 | +7: iteration 160/ 21553 | consumed samples: 40960 | consumed tokens: 83886080 | elapsed time per iteration (s): 0.33 | learning rate: 1.485E-04 | global batch size: 256 | lm loss: 5.956919E+00 | grad norm: 1.008 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 785.716 | TFLOPs: 27.51 | +7: iteration 170/ 21553 | consumed samples: 43520 | consumed tokens: 89128960 | elapsed time per iteration (s): 0.31 | learning rate: 1.577E-04 | global batch size: 256 | lm loss: 5.923164E+00 | grad norm: 1.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.138 | TFLOPs: 28.78 | +7: iteration 180/ 21553 | consumed samples: 46080 | consumed tokens: 94371840 | elapsed time per iteration (s): 0.32 | learning rate: 1.670E-04 | global batch size: 256 | lm loss: 5.805059E+00 | grad norm: 1.389 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 801.898 | TFLOPs: 28.07 | +7: iteration 190/ 21553 | consumed samples: 48640 | consumed tokens: 99614720 | elapsed time per iteration (s): 0.32 | learning rate: 1.763E-04 | global batch size: 256 | lm loss: 5.761803E+00 | grad norm: 1.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 789.595 | TFLOPs: 27.64 | +7: iteration 200/ 21553 | consumed samples: 51200 | consumed tokens: 104857600 | elapsed time per iteration (s): 0.31 | learning rate: 1.856E-04 | global batch size: 256 | lm loss: 5.687910E+00 | grad norm: 1.545 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.284 | TFLOPs: 28.61 | +7: iteration 210/ 21553 | consumed samples: 53760 | consumed tokens: 110100480 | elapsed time per iteration (s): 0.31 | learning rate: 1.949E-04 | global batch size: 256 | lm loss: 5.650314E+00 | grad norm: 1.193 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.545 | TFLOPs: 28.80 | +7: iteration 220/ 21553 | consumed samples: 56320 | consumed tokens: 115343360 | elapsed time per iteration (s): 0.32 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.571930E+00 | grad norm: 1.189 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 799.643 | TFLOPs: 27.99 | +7: iteration 230/ 21553 | consumed samples: 58880 | consumed tokens: 120586240 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.538058E+00 | grad norm: 0.916 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.443 | TFLOPs: 28.58 | +7: iteration 240/ 21553 | consumed samples: 61440 | consumed tokens: 125829120 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.508624E+00 | grad norm: 0.726 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.550 | TFLOPs: 29.25 | +7: iteration 250/ 21553 | consumed samples: 64000 | consumed tokens: 131072000 | elapsed time per iteration (s): 0.32 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.429354E+00 | grad norm: 1.049 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 801.318 | TFLOPs: 28.05 | +7: iteration 260/ 21553 | consumed samples: 66560 | consumed tokens: 136314880 | elapsed time per iteration (s): 0.33 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.409290E+00 | grad norm: 0.815 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 772.652 | TFLOPs: 27.05 | +7: iteration 270/ 21553 | consumed samples: 69120 | consumed tokens: 141557760 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.332355E+00 | grad norm: 0.862 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.458 | TFLOPs: 28.48 | +7: iteration 280/ 21553 | consumed samples: 71680 | consumed tokens: 146800640 | elapsed time per iteration (s): 0.32 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.317310E+00 | grad norm: 0.733 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 792.342 | TFLOPs: 27.74 | +7: iteration 290/ 21553 | consumed samples: 74240 | consumed tokens: 152043520 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.262907E+00 | grad norm: 1.262 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.510 | TFLOPs: 28.48 | +7: iteration 300/ 21553 | consumed samples: 76800 | consumed tokens: 157286400 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.185440E+00 | grad norm: 0.676 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.729 | TFLOPs: 28.77 | +7: iteration 310/ 21553 | consumed samples: 79360 | consumed tokens: 162529280 | elapsed time per iteration (s): 0.32 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.204135E+00 | grad norm: 0.562 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.918 | TFLOPs: 28.28 | +7: iteration 320/ 21553 | consumed samples: 81920 | consumed tokens: 167772160 | elapsed time per iteration (s): 0.33 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.176025E+00 | grad norm: 0.946 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 768.694 | TFLOPs: 26.91 | +7: iteration 330/ 21553 | consumed samples: 84480 | consumed tokens: 173015040 | elapsed time per iteration (s): 0.32 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.149445E+00 | grad norm: 0.890 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 804.225 | TFLOPs: 28.15 | +7: iteration 340/ 21553 | consumed samples: 87040 | consumed tokens: 178257920 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.115939E+00 | grad norm: 0.735 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.520 | TFLOPs: 29.39 | +7: iteration 350/ 21553 | consumed samples: 89600 | consumed tokens: 183500800 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.059958E+00 | grad norm: 0.714 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.950 | TFLOPs: 28.70 | +7: iteration 360/ 21553 | consumed samples: 92160 | consumed tokens: 188743680 | elapsed time per iteration (s): 0.32 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.025798E+00 | grad norm: 0.853 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 796.451 | TFLOPs: 27.88 | +7: iteration 370/ 21553 | consumed samples: 94720 | consumed tokens: 193986560 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 4.976713E+00 | grad norm: 0.691 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.473 | TFLOPs: 28.72 | +7: iteration 380/ 21553 | consumed samples: 97280 | consumed tokens: 199229440 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 4.971929E+00 | grad norm: 0.735 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.713 | TFLOPs: 28.73 | +7: iteration 390/ 21553 | consumed samples: 99840 | consumed tokens: 204472320 | elapsed time per iteration (s): 0.32 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 4.927294E+00 | grad norm: 0.910 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 799.569 | TFLOPs: 27.99 | +7: iteration 400/ 21553 | consumed samples: 102400 | consumed tokens: 209715200 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 4.946591E+00 | grad norm: 0.957 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 814.683 | TFLOPs: 28.52 | +7: iteration 410/ 21553 | consumed samples: 104960 | consumed tokens: 214958080 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 4.901430E+00 | grad norm: 0.801 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.276 | TFLOPs: 28.93 | +7: iteration 420/ 21553 | consumed samples: 107520 | consumed tokens: 220200960 | elapsed time per iteration (s): 0.32 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 4.927833E+00 | grad norm: 0.789 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 802.910 | TFLOPs: 28.11 | +7: iteration 430/ 21553 | consumed samples: 110080 | consumed tokens: 225443840 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 4.864362E+00 | grad norm: 0.847 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.065 | TFLOPs: 28.78 | +7: iteration 440/ 21553 | consumed samples: 112640 | consumed tokens: 230686720 | elapsed time per iteration (s): 0.32 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 4.817537E+00 | grad norm: 0.776 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 792.765 | TFLOPs: 27.75 | +7: iteration 450/ 21553 | consumed samples: 115200 | consumed tokens: 235929600 | elapsed time per iteration (s): 0.31 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.790571E+00 | grad norm: 0.842 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.727 | TFLOPs: 28.84 | +7: iteration 460/ 21553 | consumed samples: 117760 | consumed tokens: 241172480 | elapsed time per iteration (s): 0.31 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.799353E+00 | grad norm: 1.025 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.087 | TFLOPs: 28.53 | +7: iteration 470/ 21553 | consumed samples: 120320 | consumed tokens: 246415360 | elapsed time per iteration (s): 0.32 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.776627E+00 | grad norm: 1.140 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.853 | TFLOPs: 28.28 | +7: iteration 480/ 21553 | consumed samples: 122880 | consumed tokens: 251658240 | elapsed time per iteration (s): 0.32 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.752605E+00 | grad norm: 0.686 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 800.298 | TFLOPs: 28.02 | +7: iteration 490/ 21553 | consumed samples: 125440 | consumed tokens: 256901120 | elapsed time per iteration (s): 0.31 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.696078E+00 | grad norm: 0.907 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.187 | TFLOPs: 28.68 | +7: iteration 500/ 21553 | consumed samples: 128000 | consumed tokens: 262144000 | elapsed time per iteration (s): 0.32 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.688320E+00 | grad norm: 0.734 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 790.347 | TFLOPs: 27.67 | +7: iteration 510/ 21553 | consumed samples: 130560 | consumed tokens: 267386880 | elapsed time per iteration (s): 0.31 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.665505E+00 | grad norm: 0.915 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.419 | TFLOPs: 28.79 | +7: iteration 520/ 21553 | consumed samples: 133120 | consumed tokens: 272629760 | elapsed time per iteration (s): 0.31 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.636910E+00 | grad norm: 1.127 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.265 | TFLOPs: 28.86 | +7: iteration 530/ 21553 | consumed samples: 135680 | consumed tokens: 277872640 | elapsed time per iteration (s): 0.31 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.577019E+00 | grad norm: 0.978 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.320 | TFLOPs: 28.75 | +7: iteration 540/ 21553 | consumed samples: 138240 | consumed tokens: 283115520 | elapsed time per iteration (s): 0.31 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.630569E+00 | grad norm: 0.992 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.542 | TFLOPs: 28.55 | +7: iteration 550/ 21553 | consumed samples: 140800 | consumed tokens: 288358400 | elapsed time per iteration (s): 0.32 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.613895E+00 | grad norm: 0.794 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 806.052 | TFLOPs: 28.22 | +7: iteration 560/ 21553 | consumed samples: 143360 | consumed tokens: 293601280 | elapsed time per iteration (s): 0.31 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.605127E+00 | grad norm: 0.956 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.025 | TFLOPs: 29.09 | +7: iteration 570/ 21553 | consumed samples: 145920 | consumed tokens: 298844160 | elapsed time per iteration (s): 0.31 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.551279E+00 | grad norm: 0.928 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.073 | TFLOPs: 28.57 | +7: iteration 580/ 21553 | consumed samples: 148480 | consumed tokens: 304087040 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.508670E+00 | grad norm: 0.808 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.345 | TFLOPs: 29.59 | +7: iteration 590/ 21553 | consumed samples: 151040 | consumed tokens: 309329920 | elapsed time per iteration (s): 0.31 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.454187E+00 | grad norm: 0.907 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.617 | TFLOPs: 29.32 | +7: iteration 600/ 21553 | consumed samples: 153600 | consumed tokens: 314572800 | elapsed time per iteration (s): 0.31 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 4.442399E+00 | grad norm: 0.901 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.843 | TFLOPs: 28.84 | +7: iteration 610/ 21553 | consumed samples: 156160 | consumed tokens: 319815680 | elapsed time per iteration (s): 0.31 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 4.432011E+00 | grad norm: 0.978 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 814.046 | TFLOPs: 28.50 | +7: iteration 620/ 21553 | consumed samples: 158720 | consumed tokens: 325058560 | elapsed time per iteration (s): 0.31 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 4.363071E+00 | grad norm: 1.142 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.425 | TFLOPs: 28.76 | +7: iteration 630/ 21553 | consumed samples: 161280 | consumed tokens: 330301440 | elapsed time per iteration (s): 0.31 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 4.423969E+00 | grad norm: 1.050 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.245 | TFLOPs: 29.17 | +7: iteration 640/ 21553 | consumed samples: 163840 | consumed tokens: 335544320 | elapsed time per iteration (s): 0.31 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 4.322603E+00 | grad norm: 1.074 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.053 | TFLOPs: 28.74 | +7: iteration 650/ 21553 | consumed samples: 166400 | consumed tokens: 340787200 | elapsed time per iteration (s): 0.31 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 4.323783E+00 | grad norm: 1.122 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.630 | TFLOPs: 29.25 | +7: iteration 660/ 21553 | consumed samples: 168960 | consumed tokens: 346030080 | elapsed time per iteration (s): 0.31 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 4.253331E+00 | grad norm: 0.952 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.523 | TFLOPs: 28.90 | +7: iteration 670/ 21553 | consumed samples: 171520 | consumed tokens: 351272960 | elapsed time per iteration (s): 0.32 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 4.187371E+00 | grad norm: 1.151 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 810.358 | TFLOPs: 28.37 | +7: iteration 680/ 21553 | consumed samples: 174080 | consumed tokens: 356515840 | elapsed time per iteration (s): 0.31 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 4.174537E+00 | grad norm: 1.108 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.269 | TFLOPs: 28.96 | +7: iteration 690/ 21553 | consumed samples: 176640 | consumed tokens: 361758720 | elapsed time per iteration (s): 0.32 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 4.153539E+00 | grad norm: 0.941 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 800.000 | TFLOPs: 28.01 | +7: iteration 700/ 21553 | consumed samples: 179200 | consumed tokens: 367001600 | elapsed time per iteration (s): 0.31 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 4.113493E+00 | grad norm: 0.873 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 818.592 | TFLOPs: 28.66 | +7: iteration 710/ 21553 | consumed samples: 181760 | consumed tokens: 372244480 | elapsed time per iteration (s): 0.31 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 4.080475E+00 | grad norm: 0.998 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.411 | TFLOPs: 28.93 | +7: iteration 720/ 21553 | consumed samples: 184320 | consumed tokens: 377487360 | elapsed time per iteration (s): 0.30 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 4.088032E+00 | grad norm: 1.149 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.405 | TFLOPs: 29.39 | +7: iteration 730/ 21553 | consumed samples: 186880 | consumed tokens: 382730240 | elapsed time per iteration (s): 0.33 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 4.043604E+00 | grad norm: 0.981 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 787.604 | TFLOPs: 27.57 | +7: iteration 740/ 21553 | consumed samples: 189440 | consumed tokens: 387973120 | elapsed time per iteration (s): 0.32 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 3.988627E+00 | grad norm: 0.995 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 812.061 | TFLOPs: 28.43 | +7: iteration 750/ 21553 | consumed samples: 192000 | consumed tokens: 393216000 | elapsed time per iteration (s): 0.32 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 3.939196E+00 | grad norm: 1.059 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 806.912 | TFLOPs: 28.25 | +7: iteration 760/ 21553 | consumed samples: 194560 | consumed tokens: 398458880 | elapsed time per iteration (s): 0.31 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 3.966461E+00 | grad norm: 0.943 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.054 | TFLOPs: 28.71 | +7: iteration 770/ 21553 | consumed samples: 197120 | consumed tokens: 403701760 | elapsed time per iteration (s): 0.32 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 3.962626E+00 | grad norm: 0.985 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.916 | TFLOPs: 28.28 | +7: iteration 780/ 21553 | consumed samples: 199680 | consumed tokens: 408944640 | elapsed time per iteration (s): 0.31 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 3.885042E+00 | grad norm: 0.807 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.714 | TFLOPs: 28.98 | +7: iteration 790/ 21553 | consumed samples: 202240 | consumed tokens: 414187520 | elapsed time per iteration (s): 0.31 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 3.858479E+00 | grad norm: 0.842 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.749 | TFLOPs: 29.26 | +7: iteration 800/ 21553 | consumed samples: 204800 | consumed tokens: 419430400 | elapsed time per iteration (s): 0.32 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 3.831419E+00 | grad norm: 0.886 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 801.257 | TFLOPs: 28.05 | +7: iteration 810/ 21553 | consumed samples: 207360 | consumed tokens: 424673280 | elapsed time per iteration (s): 0.32 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 3.800061E+00 | grad norm: 0.814 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 811.064 | TFLOPs: 28.39 | +7: iteration 820/ 21553 | consumed samples: 209920 | consumed tokens: 429916160 | elapsed time per iteration (s): 0.31 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 3.815657E+00 | grad norm: 1.032 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.894 | TFLOPs: 29.26 | +7: iteration 830/ 21553 | consumed samples: 212480 | consumed tokens: 435159040 | elapsed time per iteration (s): 0.32 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 3.825686E+00 | grad norm: 0.667 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 788.494 | TFLOPs: 27.60 | +7: iteration 840/ 21553 | consumed samples: 215040 | consumed tokens: 440401920 | elapsed time per iteration (s): 0.32 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 3.745876E+00 | grad norm: 0.881 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 794.354 | TFLOPs: 27.81 | +7: iteration 850/ 21553 | consumed samples: 217600 | consumed tokens: 445644800 | elapsed time per iteration (s): 0.30 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 3.742579E+00 | grad norm: 0.847 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.310 | TFLOPs: 29.63 | +7: iteration 860/ 21553 | consumed samples: 220160 | consumed tokens: 450887680 | elapsed time per iteration (s): 0.32 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 3.726619E+00 | grad norm: 0.877 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 812.604 | TFLOPs: 28.45 | +7: iteration 870/ 21553 | consumed samples: 222720 | consumed tokens: 456130560 | elapsed time per iteration (s): 0.31 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 3.746058E+00 | grad norm: 0.867 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.091 | TFLOPs: 28.46 | +7: iteration 880/ 21553 | consumed samples: 225280 | consumed tokens: 461373440 | elapsed time per iteration (s): 0.31 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 3.741306E+00 | grad norm: 0.802 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.031 | TFLOPs: 28.60 | +7: iteration 890/ 21553 | consumed samples: 227840 | consumed tokens: 466616320 | elapsed time per iteration (s): 0.31 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 3.692436E+00 | grad norm: 0.669 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.351 | TFLOPs: 28.68 | +7: iteration 900/ 21553 | consumed samples: 230400 | consumed tokens: 471859200 | elapsed time per iteration (s): 0.31 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 3.666847E+00 | grad norm: 0.728 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.310 | TFLOPs: 28.68 | +7: iteration 910/ 21553 | consumed samples: 232960 | consumed tokens: 477102080 | elapsed time per iteration (s): 0.31 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 3.686845E+00 | grad norm: 0.684 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.622 | TFLOPs: 29.18 | +7: iteration 920/ 21553 | consumed samples: 235520 | consumed tokens: 482344960 | elapsed time per iteration (s): 0.31 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 3.693575E+00 | grad norm: 0.964 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.770 | TFLOPs: 29.12 | +7: iteration 930/ 21553 | consumed samples: 238080 | consumed tokens: 487587840 | elapsed time per iteration (s): 0.31 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 3.633797E+00 | grad norm: 0.791 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.835 | TFLOPs: 29.26 | +7: iteration 940/ 21553 | consumed samples: 240640 | consumed tokens: 492830720 | elapsed time per iteration (s): 0.31 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 3.618401E+00 | grad norm: 0.656 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.994 | TFLOPs: 28.60 | +7: iteration 950/ 21553 | consumed samples: 243200 | consumed tokens: 498073600 | elapsed time per iteration (s): 0.32 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 3.636838E+00 | grad norm: 0.938 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 804.472 | TFLOPs: 28.16 | +7: iteration 960/ 21553 | consumed samples: 245760 | consumed tokens: 503316480 | elapsed time per iteration (s): 0.31 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 3.612357E+00 | grad norm: 0.745 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.916 | TFLOPs: 28.70 | +7: iteration 970/ 21553 | consumed samples: 248320 | consumed tokens: 508559360 | elapsed time per iteration (s): 0.31 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 3.611376E+00 | grad norm: 0.943 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.134 | TFLOPs: 29.10 | +7: iteration 980/ 21553 | consumed samples: 250880 | consumed tokens: 513802240 | elapsed time per iteration (s): 0.31 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 3.556848E+00 | grad norm: 0.574 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.571 | TFLOPs: 29.29 | +7: iteration 990/ 21553 | consumed samples: 253440 | consumed tokens: 519045120 | elapsed time per iteration (s): 0.30 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 3.575815E+00 | grad norm: 0.576 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.527 | TFLOPs: 29.53 | +7: iteration 1000/ 21553 | consumed samples: 256000 | consumed tokens: 524288000 | elapsed time per iteration (s): 0.31 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 3.556643E+00 | grad norm: 0.763 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.597 | TFLOPs: 29.08 | +7: ------------------------------------------------------------------------------------------ +7: valid loss at iteration 1000 | lm loss value: 3.494752E+00 | lm loss PPL: 3.294212E+01 | +7: ------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 1000 to checkpoints_146m +0: [2022-11-25 17:37:45,004] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step1000 is begin to save! +0: [2022-11-25 17:37:45,039] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_01-model_00-model_states.pt... +0: [2022-11-25 17:37:45,184] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_01-model_00-model_states.pt. +0: [2022-11-25 17:37:45,184] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_03-model_00-model_states.pt... +0: [2022-11-25 17:37:45,200] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_03-model_00-model_states.pt. +0: [2022-11-25 17:37:45,200] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_04-model_00-model_states.pt... +0: [2022-11-25 17:37:45,216] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_04-model_00-model_states.pt. +0: [2022-11-25 17:37:45,217] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_05-model_00-model_states.pt... +0: [2022-11-25 17:37:45,232] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_05-model_00-model_states.pt. +0: [2022-11-25 17:37:45,232] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_06-model_00-model_states.pt... +0: [2022-11-25 17:37:45,247] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_06-model_00-model_states.pt. +0: [2022-11-25 17:37:45,247] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_07-model_00-model_states.pt... +0: [2022-11-25 17:37:45,262] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_07-model_00-model_states.pt. +0: [2022-11-25 17:37:45,262] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_08-model_00-model_states.pt... +0: [2022-11-25 17:37:45,278] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_08-model_00-model_states.pt. +0: [2022-11-25 17:37:45,278] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_09-model_00-model_states.pt... +0: [2022-11-25 17:37:45,294] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_09-model_00-model_states.pt. +0: [2022-11-25 17:37:45,294] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_10-model_00-model_states.pt... +0: [2022-11-25 17:37:45,309] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_10-model_00-model_states.pt. +0: [2022-11-25 17:37:45,309] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_11-model_00-model_states.pt... +0: [2022-11-25 17:37:45,324] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_11-model_00-model_states.pt. +0: [2022-11-25 17:37:45,325] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_12-model_00-model_states.pt... +0: [2022-11-25 17:37:45,340] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_12-model_00-model_states.pt. +0: [2022-11-25 17:37:45,340] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_13-model_00-model_states.pt... +0: [2022-11-25 17:37:45,355] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_13-model_00-model_states.pt. +0: [2022-11-25 17:37:45,355] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_14-model_00-model_states.pt... +0: [2022-11-25 17:37:45,370] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_14-model_00-model_states.pt. +0: [2022-11-25 17:37:45,371] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_15-model_00-model_states.pt... +0: [2022-11-25 17:37:45,386] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_15-model_00-model_states.pt. +0: [2022-11-25 17:37:45,386] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_16-model_00-model_states.pt... +0: [2022-11-25 17:37:45,401] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_16-model_00-model_states.pt. +0: [2022-11-25 17:37:45,401] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_17-model_00-model_states.pt... +0: [2022-11-25 17:37:45,417] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_17-model_00-model_states.pt. +0: [2022-11-25 17:37:45,417] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/layer_19-model_00-model_states.pt... +0: [2022-11-25 17:37:45,418] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/layer_19-model_00-model_states.pt. +0: [2022-11-25 17:37:45,419] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step1000/mp_rank_00_model_states.pt +0: [2022-11-25 17:37:45,419] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/mp_rank_00_model_states.pt... +0: [2022-11-25 17:37:45,440] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/mp_rank_00_model_states.pt. +0: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:37:45,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step1000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:37:45,494] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:37:45,494] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 17:37:45,494] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2022-11-25 17:37:45,494] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:37:45,494] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 17:37:45,494] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2022-11-25 17:37:45,494] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:37:45,495] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 17:37:45,495] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2022-11-25 17:37:45,496] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:37:45,496] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 17:37:45,496] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2022-11-25 17:37:45,497] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:37:45,497] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 17:37:45,497] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2022-11-25 17:37:45,499] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:37:45,499] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 17:37:45,499] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2022-11-25 17:37:45,513] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:37:45,513] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:37:45,513] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 17:37:45,513] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2022-11-25 17:37:45,526] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:37:45,526] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:37:45,526] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:37:45,526] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:37:45,526] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:37:45,526] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2022-11-25 17:37:45,526] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2022-11-25 17:37:45,526] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2022-11-25 17:37:45,526] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 17:37:45,526] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2022-11-25 17:37:45,526] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 17:37:45,526] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2022-11-25 17:37:45,526] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2022-11-25 17:37:45,526] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2022-11-25 17:37:45,526] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2022-11-25 17:37:45,527] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:37:45,527] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 17:37:45,527] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2022-11-25 17:37:45,527] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:37:45,527] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2022-11-25 17:37:45,527] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2022-11-25 17:37:45,528] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:37:45,528] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 17:37:45,528] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2022-11-25 17:37:45,529] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:37:45,529] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:37:45,529] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:37:45,529] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 17:37:45,529] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 17:37:45,529] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 17:37:45,529] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2022-11-25 17:37:45,529] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2022-11-25 17:37:45,529] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2022-11-25 17:37:45,529] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:37:45,529] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:37:45,529] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:37:45,529] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2022-11-25 17:37:45,529] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 17:37:45,529] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2022-11-25 17:37:45,529] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2022-11-25 17:37:45,529] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2022-11-25 17:37:45,529] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2022-11-25 17:37:45,533] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:37:45,533] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:37:45,533] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:37:45,533] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:37:45,533] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 17:37:45,533] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 17:37:45,533] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 17:37:45,533] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 17:37:45,533] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2022-11-25 17:37:45,533] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2022-11-25 17:37:45,533] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2022-11-25 17:37:45,533] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2022-11-25 17:37:45,534] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:37:45,534] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 17:37:45,534] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2022-11-25 17:37:45,534] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:37:45,534] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:37:45,534] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:37:45,534] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:37:45,534] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:37:45,534] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:37:45,534] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:37:45,534] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 17:37:45,534] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 17:37:45,534] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 17:37:45,534] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 17:37:45,534] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 17:37:45,535] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 17:37:45,535] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 17:37:45,535] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2022-11-25 17:37:45,535] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2022-11-25 17:37:45,535] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2022-11-25 17:37:45,535] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2022-11-25 17:37:45,535] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2022-11-25 17:37:45,535] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2022-11-25 17:37:45,535] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2022-11-25 17:37:45,535] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:37:45,535] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 17:37:45,535] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2022-11-25 17:37:45,535] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:37:45,535] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 17:37:45,535] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2022-11-25 17:37:45,535] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:37:45,536] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:37:45,536] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:37:45,536] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 17:37:45,536] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 17:37:45,536] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:37:45,536] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:37:45,536] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:37:45,537] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2022-11-25 17:37:45,537] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 17:37:45,537] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2022-11-25 17:37:45,537] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2022-11-25 17:37:45,537] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2022-11-25 17:37:45,537] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2022-11-25 17:37:45,537] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:37:45,537] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2022-11-25 17:37:45,537] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2022-11-25 17:37:45,541] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:37:45,541] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:37:45,541] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:37:45,541] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:37:45,541] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:37:45,541] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:37:45,541] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 17:37:45,541] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 17:37:45,541] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 17:37:45,541] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 17:37:45,541] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 17:37:45,541] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 17:37:45,541] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2022-11-25 17:37:45,541] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2022-11-25 17:37:45,541] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2022-11-25 17:37:45,541] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2022-11-25 17:37:45,541] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2022-11-25 17:37:45,541] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2022-11-25 17:37:45,546] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:37:45,546] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:37:45,546] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:37:45,546] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:37:45,546] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:37:45,546] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:37:45,546] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 17:37:45,546] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 17:37:45,546] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:37:45,547] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 17:37:45,547] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 17:37:45,547] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2022-11-25 17:37:45,547] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2022-11-25 17:37:45,547] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 17:37:45,547] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 17:37:45,547] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 17:37:45,547] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2022-11-25 17:37:45,547] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2022-11-25 17:37:45,547] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2022-11-25 17:37:45,547] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2022-11-25 17:37:45,547] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2022-11-25 17:37:45,547] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:37:45,547] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 17:37:45,547] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2022-11-25 17:37:45,554] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 17:37:45,554] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2022-11-25 17:37:45,561] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:37:45,561] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:37:45,561] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:37:45,561] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 17:37:45,561] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +7: [2022-11-25 17:37:45,561] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +4: [2022-11-25 17:37:45,561] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2022-11-25 17:37:45,561] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2022-11-25 17:37:45,561] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2022-11-25 17:37:45,564] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:37:45,564] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step1000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 17:37:45,564] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: successfully saved checkpoint at iteration 1000 to checkpoints_146m +7: time (ms) | save-checkpoint: 572.97 +7: iteration 1010/ 21553 | consumed samples: 258560 | consumed tokens: 529530880 | elapsed time per iteration (s): 0.38 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 3.587636E+00 | grad norm: 0.722 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 671.900 | TFLOPs: 23.52 | +7: iteration 1020/ 21553 | consumed samples: 261120 | consumed tokens: 534773760 | elapsed time per iteration (s): 0.30 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 3.550388E+00 | grad norm: 0.665 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.403 | TFLOPs: 29.53 | +7: iteration 1030/ 21553 | consumed samples: 263680 | consumed tokens: 540016640 | elapsed time per iteration (s): 0.31 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 3.541209E+00 | grad norm: 0.593 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.636 | TFLOPs: 29.29 | +7: iteration 1040/ 21553 | consumed samples: 266240 | consumed tokens: 545259520 | elapsed time per iteration (s): 0.32 | learning rate: 1.993E-04 | global batch size: 256 | lm loss: 3.588026E+00 | grad norm: 0.792 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 808.211 | TFLOPs: 28.29 | +7: iteration 1050/ 21553 | consumed samples: 268800 | consumed tokens: 550502400 | elapsed time per iteration (s): 0.31 | learning rate: 1.993E-04 | global batch size: 256 | lm loss: 3.567970E+00 | grad norm: 0.771 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.875 | TFLOPs: 28.98 | +7: iteration 1060/ 21553 | consumed samples: 271360 | consumed tokens: 555745280 | elapsed time per iteration (s): 0.30 | learning rate: 1.993E-04 | global batch size: 256 | lm loss: 3.521017E+00 | grad norm: 0.653 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.886 | TFLOPs: 29.72 | +7: iteration 1070/ 21553 | consumed samples: 273920 | consumed tokens: 560988160 | elapsed time per iteration (s): 0.31 | learning rate: 1.993E-04 | global batch size: 256 | lm loss: 3.511333E+00 | grad norm: 0.521 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.484 | TFLOPs: 29.35 | +7: iteration 1080/ 21553 | consumed samples: 276480 | consumed tokens: 566231040 | elapsed time per iteration (s): 0.32 | learning rate: 1.993E-04 | global batch size: 256 | lm loss: 3.458138E+00 | grad norm: 0.630 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 805.386 | TFLOPs: 28.19 | +7: iteration 1090/ 21553 | consumed samples: 279040 | consumed tokens: 571473920 | elapsed time per iteration (s): 0.31 | learning rate: 1.993E-04 | global batch size: 256 | lm loss: 3.486948E+00 | grad norm: 0.705 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.752 | TFLOPs: 28.73 | +7: iteration 1100/ 21553 | consumed samples: 281600 | consumed tokens: 576716800 | elapsed time per iteration (s): 0.30 | learning rate: 1.992E-04 | global batch size: 256 | lm loss: 3.519971E+00 | grad norm: 0.839 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.525 | TFLOPs: 29.56 | +7: iteration 1110/ 21553 | consumed samples: 284160 | consumed tokens: 581959680 | elapsed time per iteration (s): 0.32 | learning rate: 1.992E-04 | global batch size: 256 | lm loss: 3.477241E+00 | grad norm: 0.660 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 804.756 | TFLOPs: 28.17 | +7: iteration 1120/ 21553 | consumed samples: 286720 | consumed tokens: 587202560 | elapsed time per iteration (s): 0.32 | learning rate: 1.992E-04 | global batch size: 256 | lm loss: 3.505001E+00 | grad norm: 0.743 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 810.878 | TFLOPs: 28.39 | +7: iteration 1130/ 21553 | consumed samples: 289280 | consumed tokens: 592445440 | elapsed time per iteration (s): 0.34 | learning rate: 1.992E-04 | global batch size: 256 | lm loss: 3.505209E+00 | grad norm: 0.600 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 757.509 | TFLOPs: 26.52 | +7: iteration 1140/ 21553 | consumed samples: 291840 | consumed tokens: 597688320 | elapsed time per iteration (s): 0.31 | learning rate: 1.992E-04 | global batch size: 256 | lm loss: 3.458722E+00 | grad norm: 0.665 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.810 | TFLOPs: 29.36 | +7: iteration 1150/ 21553 | consumed samples: 294400 | consumed tokens: 602931200 | elapsed time per iteration (s): 0.31 | learning rate: 1.991E-04 | global batch size: 256 | lm loss: 3.490778E+00 | grad norm: 0.521 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.485 | TFLOPs: 28.69 | +7: iteration 1160/ 21553 | consumed samples: 296960 | consumed tokens: 608174080 | elapsed time per iteration (s): 0.31 | learning rate: 1.991E-04 | global batch size: 256 | lm loss: 3.469584E+00 | grad norm: 0.717 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.764 | TFLOPs: 29.26 | +7: iteration 1170/ 21553 | consumed samples: 299520 | consumed tokens: 613416960 | elapsed time per iteration (s): 0.31 | learning rate: 1.991E-04 | global batch size: 256 | lm loss: 3.472331E+00 | grad norm: 0.872 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.432 | TFLOPs: 28.83 | +7: iteration 1180/ 21553 | consumed samples: 302080 | consumed tokens: 618659840 | elapsed time per iteration (s): 0.31 | learning rate: 1.991E-04 | global batch size: 256 | lm loss: 3.488086E+00 | grad norm: 0.637 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.411 | TFLOPs: 29.14 | +7: iteration 1190/ 21553 | consumed samples: 304640 | consumed tokens: 623902720 | elapsed time per iteration (s): 0.31 | learning rate: 1.991E-04 | global batch size: 256 | lm loss: 3.485135E+00 | grad norm: 0.570 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.435 | TFLOPs: 28.97 | +7: iteration 1200/ 21553 | consumed samples: 307200 | consumed tokens: 629145600 | elapsed time per iteration (s): 0.30 | learning rate: 1.991E-04 | global batch size: 256 | lm loss: 3.452738E+00 | grad norm: 0.552 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.661 | TFLOPs: 29.67 | +7: iteration 1210/ 21553 | consumed samples: 309760 | consumed tokens: 634388480 | elapsed time per iteration (s): 0.33 | learning rate: 1.990E-04 | global batch size: 256 | lm loss: 3.441208E+00 | grad norm: 0.583 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 784.923 | TFLOPs: 27.48 | +7: iteration 1220/ 21553 | consumed samples: 312320 | consumed tokens: 639631360 | elapsed time per iteration (s): 0.31 | learning rate: 1.990E-04 | global batch size: 256 | lm loss: 3.379491E+00 | grad norm: 0.675 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.936 | TFLOPs: 28.95 | +7: iteration 1230/ 21553 | consumed samples: 314880 | consumed tokens: 644874240 | elapsed time per iteration (s): 0.31 | learning rate: 1.990E-04 | global batch size: 256 | lm loss: 3.444969E+00 | grad norm: 0.722 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.298 | TFLOPs: 28.93 | +7: iteration 1240/ 21553 | consumed samples: 317440 | consumed tokens: 650117120 | elapsed time per iteration (s): 0.31 | learning rate: 1.990E-04 | global batch size: 256 | lm loss: 3.413920E+00 | grad norm: 0.551 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.272 | TFLOPs: 28.93 | +7: iteration 1250/ 21553 | consumed samples: 320000 | consumed tokens: 655360000 | elapsed time per iteration (s): 0.30 | learning rate: 1.990E-04 | global batch size: 256 | lm loss: 3.414559E+00 | grad norm: 0.560 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.835 | TFLOPs: 29.44 | +7: iteration 1260/ 21553 | consumed samples: 322560 | consumed tokens: 660602880 | elapsed time per iteration (s): 0.30 | learning rate: 1.989E-04 | global batch size: 256 | lm loss: 3.353938E+00 | grad norm: 0.555 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.150 | TFLOPs: 29.41 | +7: iteration 1270/ 21553 | consumed samples: 325120 | consumed tokens: 665845760 | elapsed time per iteration (s): 0.30 | learning rate: 1.989E-04 | global batch size: 256 | lm loss: 3.414031E+00 | grad norm: 0.667 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.749 | TFLOPs: 29.43 | +7: iteration 1280/ 21553 | consumed samples: 327680 | consumed tokens: 671088640 | elapsed time per iteration (s): 0.30 | learning rate: 1.989E-04 | global batch size: 256 | lm loss: 3.415719E+00 | grad norm: 0.664 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.779 | TFLOPs: 29.50 | +7: iteration 1290/ 21553 | consumed samples: 330240 | consumed tokens: 676331520 | elapsed time per iteration (s): 0.31 | learning rate: 1.989E-04 | global batch size: 256 | lm loss: 3.295724E+00 | grad norm: 0.606 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 818.367 | TFLOPs: 28.65 | +7: iteration 1300/ 21553 | consumed samples: 332800 | consumed tokens: 681574400 | elapsed time per iteration (s): 0.30 | learning rate: 1.989E-04 | global batch size: 256 | lm loss: 3.378904E+00 | grad norm: 0.594 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.549 | TFLOPs: 29.64 | +7: iteration 1310/ 21553 | consumed samples: 335360 | consumed tokens: 686817280 | elapsed time per iteration (s): 0.30 | learning rate: 1.988E-04 | global batch size: 256 | lm loss: 3.356820E+00 | grad norm: 0.592 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.069 | TFLOPs: 29.72 | +7: iteration 1320/ 21553 | consumed samples: 337920 | consumed tokens: 692060160 | elapsed time per iteration (s): 0.31 | learning rate: 1.988E-04 | global batch size: 256 | lm loss: 3.354152E+00 | grad norm: 0.621 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.015 | TFLOPs: 28.71 | +7: iteration 1330/ 21553 | consumed samples: 340480 | consumed tokens: 697303040 | elapsed time per iteration (s): 0.31 | learning rate: 1.988E-04 | global batch size: 256 | lm loss: 3.357187E+00 | grad norm: 0.517 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.153 | TFLOPs: 28.96 | +7: iteration 1340/ 21553 | consumed samples: 343040 | consumed tokens: 702545920 | elapsed time per iteration (s): 0.30 | learning rate: 1.988E-04 | global batch size: 256 | lm loss: 3.367592E+00 | grad norm: 0.626 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.415 | TFLOPs: 29.56 | +7: iteration 1350/ 21553 | consumed samples: 345600 | consumed tokens: 707788800 | elapsed time per iteration (s): 0.30 | learning rate: 1.987E-04 | global batch size: 256 | lm loss: 3.343414E+00 | grad norm: 0.636 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.577 | TFLOPs: 29.50 | +7: iteration 1360/ 21553 | consumed samples: 348160 | consumed tokens: 713031680 | elapsed time per iteration (s): 0.31 | learning rate: 1.987E-04 | global batch size: 256 | lm loss: 3.305142E+00 | grad norm: 0.679 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.745 | TFLOPs: 28.77 | +7: iteration 1370/ 21553 | consumed samples: 350720 | consumed tokens: 718274560 | elapsed time per iteration (s): 0.31 | learning rate: 1.987E-04 | global batch size: 256 | lm loss: 3.343846E+00 | grad norm: 0.687 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.225 | TFLOPs: 29.34 | +7: iteration 1380/ 21553 | consumed samples: 353280 | consumed tokens: 723517440 | elapsed time per iteration (s): 0.31 | learning rate: 1.987E-04 | global batch size: 256 | lm loss: 3.345867E+00 | grad norm: 0.709 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 818.245 | TFLOPs: 28.64 | +7: iteration 1390/ 21553 | consumed samples: 355840 | consumed tokens: 728760320 | elapsed time per iteration (s): 0.31 | learning rate: 1.987E-04 | global batch size: 256 | lm loss: 3.340464E+00 | grad norm: 0.525 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.401 | TFLOPs: 28.90 | +7: iteration 1400/ 21553 | consumed samples: 358400 | consumed tokens: 734003200 | elapsed time per iteration (s): 0.30 | learning rate: 1.986E-04 | global batch size: 256 | lm loss: 3.377070E+00 | grad norm: 0.572 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.562 | TFLOPs: 29.60 | +7: iteration 1410/ 21553 | consumed samples: 360960 | consumed tokens: 739246080 | elapsed time per iteration (s): 0.31 | learning rate: 1.986E-04 | global batch size: 256 | lm loss: 3.335453E+00 | grad norm: 0.522 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.911 | TFLOPs: 28.81 | +7: iteration 1420/ 21553 | consumed samples: 363520 | consumed tokens: 744488960 | elapsed time per iteration (s): 0.30 | learning rate: 1.986E-04 | global batch size: 256 | lm loss: 3.284750E+00 | grad norm: 0.540 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.050 | TFLOPs: 29.55 | +7: iteration 1430/ 21553 | consumed samples: 366080 | consumed tokens: 749731840 | elapsed time per iteration (s): 0.31 | learning rate: 1.986E-04 | global batch size: 256 | lm loss: 3.288171E+00 | grad norm: 0.565 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.999 | TFLOPs: 28.74 | +7: iteration 1440/ 21553 | consumed samples: 368640 | consumed tokens: 754974720 | elapsed time per iteration (s): 0.30 | learning rate: 1.985E-04 | global batch size: 256 | lm loss: 3.318149E+00 | grad norm: 0.501 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.497 | TFLOPs: 29.67 | +7: iteration 1450/ 21553 | consumed samples: 371200 | consumed tokens: 760217600 | elapsed time per iteration (s): 0.31 | learning rate: 1.985E-04 | global batch size: 256 | lm loss: 3.307382E+00 | grad norm: 0.647 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.601 | TFLOPs: 28.97 | +7: iteration 1460/ 21553 | consumed samples: 373760 | consumed tokens: 765460480 | elapsed time per iteration (s): 0.31 | learning rate: 1.985E-04 | global batch size: 256 | lm loss: 3.323151E+00 | grad norm: 0.628 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.782 | TFLOPs: 28.59 | +7: iteration 1470/ 21553 | consumed samples: 376320 | consumed tokens: 770703360 | elapsed time per iteration (s): 0.32 | learning rate: 1.985E-04 | global batch size: 256 | lm loss: 3.286049E+00 | grad norm: 0.647 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 797.172 | TFLOPs: 27.91 | +7: iteration 1480/ 21553 | consumed samples: 378880 | consumed tokens: 775946240 | elapsed time per iteration (s): 0.30 | learning rate: 1.984E-04 | global batch size: 256 | lm loss: 3.307124E+00 | grad norm: 0.466 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.931 | TFLOPs: 29.54 | +7: iteration 1490/ 21553 | consumed samples: 381440 | consumed tokens: 781189120 | elapsed time per iteration (s): 0.30 | learning rate: 1.984E-04 | global batch size: 256 | lm loss: 3.320826E+00 | grad norm: 0.516 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.148 | TFLOPs: 29.73 | +7: iteration 1500/ 21553 | consumed samples: 384000 | consumed tokens: 786432000 | elapsed time per iteration (s): 0.31 | learning rate: 1.984E-04 | global batch size: 256 | lm loss: 3.296010E+00 | grad norm: 0.802 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.290 | TFLOPs: 29.10 | +7: iteration 1510/ 21553 | consumed samples: 386560 | consumed tokens: 791674880 | elapsed time per iteration (s): 0.31 | learning rate: 1.984E-04 | global batch size: 256 | lm loss: 3.307404E+00 | grad norm: 0.562 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.332 | TFLOPs: 29.31 | +7: iteration 1520/ 21553 | consumed samples: 389120 | consumed tokens: 796917760 | elapsed time per iteration (s): 0.31 | learning rate: 1.983E-04 | global batch size: 256 | lm loss: 3.301585E+00 | grad norm: 0.831 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.682 | TFLOPs: 29.04 | +7: iteration 1530/ 21553 | consumed samples: 391680 | consumed tokens: 802160640 | elapsed time per iteration (s): 0.31 | learning rate: 1.983E-04 | global batch size: 256 | lm loss: 3.285704E+00 | grad norm: 0.523 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.957 | TFLOPs: 28.84 | +7: iteration 1540/ 21553 | consumed samples: 394240 | consumed tokens: 807403520 | elapsed time per iteration (s): 0.31 | learning rate: 1.983E-04 | global batch size: 256 | lm loss: 3.299588E+00 | grad norm: 0.634 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.244 | TFLOPs: 29.38 | +7: iteration 1550/ 21553 | consumed samples: 396800 | consumed tokens: 812646400 | elapsed time per iteration (s): 0.31 | learning rate: 1.983E-04 | global batch size: 256 | lm loss: 3.268024E+00 | grad norm: 0.465 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 814.136 | TFLOPs: 28.50 | +7: iteration 1560/ 21553 | consumed samples: 399360 | consumed tokens: 817889280 | elapsed time per iteration (s): 0.31 | learning rate: 1.982E-04 | global batch size: 256 | lm loss: 3.246824E+00 | grad norm: 0.502 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.665 | TFLOPs: 28.80 | +7: iteration 1570/ 21553 | consumed samples: 401920 | consumed tokens: 823132160 | elapsed time per iteration (s): 0.31 | learning rate: 1.982E-04 | global batch size: 256 | lm loss: 3.238808E+00 | grad norm: 0.535 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.616 | TFLOPs: 29.29 | +7: iteration 1580/ 21553 | consumed samples: 404480 | consumed tokens: 828375040 | elapsed time per iteration (s): 0.31 | learning rate: 1.982E-04 | global batch size: 256 | lm loss: 3.268457E+00 | grad norm: 0.517 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.356 | TFLOPs: 28.68 | +7: iteration 1590/ 21553 | consumed samples: 407040 | consumed tokens: 833617920 | elapsed time per iteration (s): 0.30 | learning rate: 1.982E-04 | global batch size: 256 | lm loss: 3.271099E+00 | grad norm: 0.541 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.155 | TFLOPs: 29.45 | +7: iteration 1600/ 21553 | consumed samples: 409600 | consumed tokens: 838860800 | elapsed time per iteration (s): 0.31 | learning rate: 1.981E-04 | global batch size: 256 | lm loss: 3.242576E+00 | grad norm: 0.554 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.504 | TFLOPs: 28.97 | +7: iteration 1610/ 21553 | consumed samples: 412160 | consumed tokens: 844103680 | elapsed time per iteration (s): 0.30 | learning rate: 1.981E-04 | global batch size: 256 | lm loss: 3.222989E+00 | grad norm: 0.605 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.384 | TFLOPs: 29.56 | +7: iteration 1620/ 21553 | consumed samples: 414720 | consumed tokens: 849346560 | elapsed time per iteration (s): 0.32 | learning rate: 1.981E-04 | global batch size: 256 | lm loss: 3.222706E+00 | grad norm: 0.547 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 788.890 | TFLOPs: 27.62 | +7: iteration 1630/ 21553 | consumed samples: 417280 | consumed tokens: 854589440 | elapsed time per iteration (s): 0.31 | learning rate: 1.981E-04 | global batch size: 256 | lm loss: 3.237730E+00 | grad norm: 0.469 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.691 | TFLOPs: 29.33 | +7: iteration 1640/ 21553 | consumed samples: 419840 | consumed tokens: 859832320 | elapsed time per iteration (s): 0.32 | learning rate: 1.980E-04 | global batch size: 256 | lm loss: 3.212426E+00 | grad norm: 0.662 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 811.120 | TFLOPs: 28.40 | +7: iteration 1650/ 21553 | consumed samples: 422400 | consumed tokens: 865075200 | elapsed time per iteration (s): 0.33 | learning rate: 1.980E-04 | global batch size: 256 | lm loss: 3.276519E+00 | grad norm: 0.548 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 781.023 | TFLOPs: 27.34 | +7: iteration 1660/ 21553 | consumed samples: 424960 | consumed tokens: 870318080 | elapsed time per iteration (s): 0.31 | learning rate: 1.980E-04 | global batch size: 256 | lm loss: 3.196770E+00 | grad norm: 0.530 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.799 | TFLOPs: 29.26 | +7: iteration 1670/ 21553 | consumed samples: 427520 | consumed tokens: 875560960 | elapsed time per iteration (s): 0.31 | learning rate: 1.979E-04 | global batch size: 256 | lm loss: 3.230863E+00 | grad norm: 0.480 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 818.709 | TFLOPs: 28.66 | +7: iteration 1680/ 21553 | consumed samples: 430080 | consumed tokens: 880803840 | elapsed time per iteration (s): 0.30 | learning rate: 1.979E-04 | global batch size: 256 | lm loss: 3.206603E+00 | grad norm: 0.714 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.077 | TFLOPs: 29.44 | +7: iteration 1690/ 21553 | consumed samples: 432640 | consumed tokens: 886046720 | elapsed time per iteration (s): 0.31 | learning rate: 1.979E-04 | global batch size: 256 | lm loss: 3.259213E+00 | grad norm: 0.531 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.396 | TFLOPs: 28.61 | +7: iteration 1700/ 21553 | consumed samples: 435200 | consumed tokens: 891289600 | elapsed time per iteration (s): 0.31 | learning rate: 1.979E-04 | global batch size: 256 | lm loss: 3.251443E+00 | grad norm: 0.517 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.952 | TFLOPs: 29.37 | +7: iteration 1710/ 21553 | consumed samples: 437760 | consumed tokens: 896532480 | elapsed time per iteration (s): 0.30 | learning rate: 1.978E-04 | global batch size: 256 | lm loss: 3.214177E+00 | grad norm: 0.463 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.264 | TFLOPs: 29.98 | +7: iteration 1720/ 21553 | consumed samples: 440320 | consumed tokens: 901775360 | elapsed time per iteration (s): 0.31 | learning rate: 1.978E-04 | global batch size: 256 | lm loss: 3.209499E+00 | grad norm: 0.532 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.052 | TFLOPs: 28.81 | +7: iteration 1730/ 21553 | consumed samples: 442880 | consumed tokens: 907018240 | elapsed time per iteration (s): 0.30 | learning rate: 1.978E-04 | global batch size: 256 | lm loss: 3.201115E+00 | grad norm: 0.727 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.384 | TFLOPs: 29.42 | +7: iteration 1740/ 21553 | consumed samples: 445440 | consumed tokens: 912261120 | elapsed time per iteration (s): 0.31 | learning rate: 1.977E-04 | global batch size: 256 | lm loss: 3.249291E+00 | grad norm: 0.546 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.503 | TFLOPs: 28.93 | +7: iteration 1750/ 21553 | consumed samples: 448000 | consumed tokens: 917504000 | elapsed time per iteration (s): 0.30 | learning rate: 1.977E-04 | global batch size: 256 | lm loss: 3.212261E+00 | grad norm: 0.495 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.085 | TFLOPs: 29.51 | +7: iteration 1760/ 21553 | consumed samples: 450560 | consumed tokens: 922746880 | elapsed time per iteration (s): 0.31 | learning rate: 1.977E-04 | global batch size: 256 | lm loss: 3.222628E+00 | grad norm: 0.502 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.917 | TFLOPs: 29.09 | +7: iteration 1770/ 21553 | consumed samples: 453120 | consumed tokens: 927989760 | elapsed time per iteration (s): 0.31 | learning rate: 1.977E-04 | global batch size: 256 | lm loss: 3.186160E+00 | grad norm: 0.501 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.302 | TFLOPs: 28.93 | +7: iteration 1780/ 21553 | consumed samples: 455680 | consumed tokens: 933232640 | elapsed time per iteration (s): 0.31 | learning rate: 1.976E-04 | global batch size: 256 | lm loss: 3.172374E+00 | grad norm: 0.523 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.943 | TFLOPs: 29.02 | +7: iteration 1790/ 21553 | consumed samples: 458240 | consumed tokens: 938475520 | elapsed time per iteration (s): 0.31 | learning rate: 1.976E-04 | global batch size: 256 | lm loss: 3.226737E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.896 | TFLOPs: 28.81 | +7: iteration 1800/ 21553 | consumed samples: 460800 | consumed tokens: 943718400 | elapsed time per iteration (s): 0.31 | learning rate: 1.976E-04 | global batch size: 256 | lm loss: 3.170218E+00 | grad norm: 0.612 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.980 | TFLOPs: 29.37 | +7: iteration 1810/ 21553 | consumed samples: 463360 | consumed tokens: 948961280 | elapsed time per iteration (s): 0.31 | learning rate: 1.975E-04 | global batch size: 256 | lm loss: 3.206316E+00 | grad norm: 0.581 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.325 | TFLOPs: 28.75 | +7: iteration 1820/ 21553 | consumed samples: 465920 | consumed tokens: 954204160 | elapsed time per iteration (s): 0.30 | learning rate: 1.975E-04 | global batch size: 256 | lm loss: 3.182804E+00 | grad norm: 0.494 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.465 | TFLOPs: 29.81 | +7: iteration 1830/ 21553 | consumed samples: 468480 | consumed tokens: 959447040 | elapsed time per iteration (s): 0.30 | learning rate: 1.975E-04 | global batch size: 256 | lm loss: 3.169984E+00 | grad norm: 0.533 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.671 | TFLOPs: 29.57 | +7: iteration 1840/ 21553 | consumed samples: 471040 | consumed tokens: 964689920 | elapsed time per iteration (s): 0.31 | learning rate: 1.974E-04 | global batch size: 256 | lm loss: 3.169182E+00 | grad norm: 0.522 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.283 | TFLOPs: 28.86 | +7: iteration 1850/ 21553 | consumed samples: 473600 | consumed tokens: 969932800 | elapsed time per iteration (s): 0.30 | learning rate: 1.974E-04 | global batch size: 256 | lm loss: 3.177455E+00 | grad norm: 0.493 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.909 | TFLOPs: 29.61 | +7: iteration 1860/ 21553 | consumed samples: 476160 | consumed tokens: 975175680 | elapsed time per iteration (s): 0.31 | learning rate: 1.974E-04 | global batch size: 256 | lm loss: 3.135427E+00 | grad norm: 0.502 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.328 | TFLOPs: 29.14 | +7: iteration 1870/ 21553 | consumed samples: 478720 | consumed tokens: 980418560 | elapsed time per iteration (s): 0.31 | learning rate: 1.973E-04 | global batch size: 256 | lm loss: 3.146823E+00 | grad norm: 0.504 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.697 | TFLOPs: 29.12 | +7: iteration 1880/ 21553 | consumed samples: 481280 | consumed tokens: 985661440 | elapsed time per iteration (s): 0.31 | learning rate: 1.973E-04 | global batch size: 256 | lm loss: 3.128756E+00 | grad norm: 0.498 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.595 | TFLOPs: 29.36 | +7: iteration 1890/ 21553 | consumed samples: 483840 | consumed tokens: 990904320 | elapsed time per iteration (s): 0.31 | learning rate: 1.973E-04 | global batch size: 256 | lm loss: 3.180097E+00 | grad norm: 0.529 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.485 | TFLOPs: 28.86 | +7: iteration 1900/ 21553 | consumed samples: 486400 | consumed tokens: 996147200 | elapsed time per iteration (s): 0.30 | learning rate: 1.972E-04 | global batch size: 256 | lm loss: 3.117027E+00 | grad norm: 0.643 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.832 | TFLOPs: 29.40 | +7: iteration 1910/ 21553 | consumed samples: 488960 | consumed tokens: 1001390080 | elapsed time per iteration (s): 0.30 | learning rate: 1.972E-04 | global batch size: 256 | lm loss: 3.174645E+00 | grad norm: 0.460 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.067 | TFLOPs: 29.65 | +7: iteration 1920/ 21553 | consumed samples: 491520 | consumed tokens: 1006632960 | elapsed time per iteration (s): 0.32 | learning rate: 1.972E-04 | global batch size: 256 | lm loss: 3.134954E+00 | grad norm: 0.566 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 808.423 | TFLOPs: 28.30 | +7: iteration 1930/ 21553 | consumed samples: 494080 | consumed tokens: 1011875840 | elapsed time per iteration (s): 0.31 | learning rate: 1.971E-04 | global batch size: 256 | lm loss: 3.148380E+00 | grad norm: 0.469 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.858 | TFLOPs: 29.16 | +7: iteration 1940/ 21553 | consumed samples: 496640 | consumed tokens: 1017118720 | elapsed time per iteration (s): 0.31 | learning rate: 1.971E-04 | global batch size: 256 | lm loss: 3.136999E+00 | grad norm: 0.671 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.275 | TFLOPs: 28.93 | +7: iteration 1950/ 21553 | consumed samples: 499200 | consumed tokens: 1022361600 | elapsed time per iteration (s): 0.31 | learning rate: 1.971E-04 | global batch size: 256 | lm loss: 3.118780E+00 | grad norm: 0.489 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.531 | TFLOPs: 29.14 | +7: iteration 1960/ 21553 | consumed samples: 501760 | consumed tokens: 1027604480 | elapsed time per iteration (s): 0.31 | learning rate: 1.970E-04 | global batch size: 256 | lm loss: 3.176503E+00 | grad norm: 0.588 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 818.410 | TFLOPs: 28.65 | +7: iteration 1970/ 21553 | consumed samples: 504320 | consumed tokens: 1032847360 | elapsed time per iteration (s): 0.30 | learning rate: 1.970E-04 | global batch size: 256 | lm loss: 3.176288E+00 | grad norm: 0.531 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.491 | TFLOPs: 29.60 | +7: iteration 1980/ 21553 | consumed samples: 506880 | consumed tokens: 1038090240 | elapsed time per iteration (s): 0.30 | learning rate: 1.970E-04 | global batch size: 256 | lm loss: 3.142701E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.718 | TFLOPs: 29.50 | +7: iteration 1990/ 21553 | consumed samples: 509440 | consumed tokens: 1043333120 | elapsed time per iteration (s): 0.31 | learning rate: 1.969E-04 | global batch size: 256 | lm loss: 3.170610E+00 | grad norm: 0.522 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.953 | TFLOPs: 29.19 | +0: [2022-11-25 17:42:54,289] [INFO] [logging.py:68:log_dist] [Rank 0] step=2000, skipped=0, lr=[0.00019691153209285103, 0.00019691153209285103, 0.00019691153209285103], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 2000/ 21553 | consumed samples: 512000 | consumed tokens: 1048576000 | elapsed time per iteration (s): 0.30 | learning rate: 1.969E-04 | global batch size: 256 | lm loss: 3.162373E+00 | grad norm: 0.530 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.859 | TFLOPs: 29.40 | +0: steps: 2000 loss: 3.2415 iter time (s): 0.314 samples/sec: 814.312 +7: ------------------------------------------------------------------------------------------ +7: valid loss at iteration 2000 | lm loss value: 3.029966E+00 | lm loss PPL: 2.069654E+01 | +7: ------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 2000 to checkpoints_146m +0: [2022-11-25 17:42:54,422] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step2000 is begin to save! +0: [2022-11-25 17:42:54,425] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_01-model_00-model_states.pt... +0: [2022-11-25 17:42:54,508] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_01-model_00-model_states.pt. +0: [2022-11-25 17:42:54,509] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_03-model_00-model_states.pt... +0: [2022-11-25 17:42:54,524] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_03-model_00-model_states.pt. +0: [2022-11-25 17:42:54,524] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_04-model_00-model_states.pt... +0: [2022-11-25 17:42:54,539] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_04-model_00-model_states.pt. +0: [2022-11-25 17:42:54,539] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_05-model_00-model_states.pt... +0: [2022-11-25 17:42:54,554] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_05-model_00-model_states.pt. +0: [2022-11-25 17:42:54,554] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_06-model_00-model_states.pt... +0: [2022-11-25 17:42:54,568] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_06-model_00-model_states.pt. +0: [2022-11-25 17:42:54,568] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_07-model_00-model_states.pt... +0: [2022-11-25 17:42:54,583] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_07-model_00-model_states.pt. +0: [2022-11-25 17:42:54,583] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_08-model_00-model_states.pt... +0: [2022-11-25 17:42:54,598] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_08-model_00-model_states.pt. +0: [2022-11-25 17:42:54,598] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_09-model_00-model_states.pt... +0: [2022-11-25 17:42:54,612] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_09-model_00-model_states.pt. +0: [2022-11-25 17:42:54,613] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_10-model_00-model_states.pt... +0: [2022-11-25 17:42:54,627] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_10-model_00-model_states.pt. +0: [2022-11-25 17:42:54,627] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_11-model_00-model_states.pt... +0: [2022-11-25 17:42:54,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_11-model_00-model_states.pt. +0: [2022-11-25 17:42:54,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_12-model_00-model_states.pt... +0: [2022-11-25 17:42:54,657] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_12-model_00-model_states.pt. +0: [2022-11-25 17:42:54,657] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_13-model_00-model_states.pt... +0: [2022-11-25 17:42:54,672] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_13-model_00-model_states.pt. +0: [2022-11-25 17:42:54,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_14-model_00-model_states.pt... +0: [2022-11-25 17:42:54,686] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_14-model_00-model_states.pt. +0: [2022-11-25 17:42:54,687] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_15-model_00-model_states.pt... +0: [2022-11-25 17:42:54,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_15-model_00-model_states.pt. +0: [2022-11-25 17:42:54,701] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_16-model_00-model_states.pt... +0: [2022-11-25 17:42:54,716] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_16-model_00-model_states.pt. +0: [2022-11-25 17:42:54,716] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_17-model_00-model_states.pt... +0: [2022-11-25 17:42:54,731] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_17-model_00-model_states.pt. +0: [2022-11-25 17:42:54,731] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/layer_19-model_00-model_states.pt... +0: [2022-11-25 17:42:54,732] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/layer_19-model_00-model_states.pt. +0: [2022-11-25 17:42:54,733] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step2000/mp_rank_00_model_states.pt +0: [2022-11-25 17:42:54,733] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/mp_rank_00_model_states.pt... +0: [2022-11-25 17:42:54,735] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/mp_rank_00_model_states.pt. +0: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:42:54,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step2000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:42:54,785] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:42:54,785] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 17:42:54,785] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2022-11-25 17:42:54,786] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:42:54,786] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3: [2022-11-25 17:42:54,786] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:42:54,786] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +0: [2022-11-25 17:42:54,786] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2022-11-25 17:42:54,786] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2022-11-25 17:42:54,786] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:42:54,786] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 17:42:54,787] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2022-11-25 17:42:54,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:42:54,787] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 17:42:54,787] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2022-11-25 17:42:54,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:42:54,787] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 17:42:54,787] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2022-11-25 17:42:54,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:42:54,788] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 17:42:54,788] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2022-11-25 17:42:54,788] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:42:54,788] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 17:42:54,788] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2022-11-25 17:42:54,788] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:42:54,788] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 17:42:54,788] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2022-11-25 17:42:54,788] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:42:54,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:42:54,790] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:42:54,790] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 17:42:54,790] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2022-11-25 17:42:54,790] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:42:54,790] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 17:42:54,790] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:42:54,788] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:42:54,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +6: [2022-11-25 17:42:54,788] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 17:42:54,788] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:42:54,788] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2022-11-25 17:42:54,791] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2022-11-25 17:42:54,788] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +4: [2022-11-25 17:42:54,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +7: [2022-11-25 17:42:54,791] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:42:54,788] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2022-11-25 17:42:54,791] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +2: [2022-11-25 17:42:54,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +6: [2022-11-25 17:42:54,788] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:42:54,790] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:42:54,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +4: [2022-11-25 17:42:54,790] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +7: [2022-11-25 17:42:54,791] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2022-11-25 17:42:54,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2022-11-25 17:42:54,790] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2022-11-25 17:42:54,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +6: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:42:54,791] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +2: [2022-11-25 17:42:54,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +6: [2022-11-25 17:42:54,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +2: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2022-11-25 17:42:54,791] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2022-11-25 17:42:54,791] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:42:54,791] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 17:42:54,791] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:42:54,791] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2022-11-25 17:42:54,791] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 17:42:54,791] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2022-11-25 17:42:54,791] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:42:54,792] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:42:54,792] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2022-11-25 17:42:54,792] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:42:54,792] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2022-11-25 17:42:54,792] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:42:54,792] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +5: [2022-11-25 17:42:54,792] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +6: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2022-11-25 17:42:54,792] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2022-11-25 17:42:54,792] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:42:54,792] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:42:54,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +5: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2022-11-25 17:42:54,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +7: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2022-11-25 17:42:54,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2022-11-25 17:42:54,795] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:42:54,795] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 17:42:54,795] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2022-11-25 17:42:54,795] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:42:54,795] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:42:54,795] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:42:54,795] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2022-11-25 17:42:54,795] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 17:42:54,795] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 17:42:54,795] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2022-11-25 17:42:54,795] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2022-11-25 17:42:54,795] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2022-11-25 17:42:54,795] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:42:54,795] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:42:54,796] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 17:42:54,796] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2022-11-25 17:42:54,796] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2022-11-25 17:42:54,796] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2022-11-25 17:42:54,796] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:42:54,796] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 17:42:54,796] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:42:54,796] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2022-11-25 17:42:54,796] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2022-11-25 17:42:54,796] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2022-11-25 17:42:54,796] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:42:54,796] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 17:42:54,796] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2022-11-25 17:42:54,797] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:42:54,797] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 17:42:54,797] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:42:54,797] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2022-11-25 17:42:54,797] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 17:42:54,797] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2022-11-25 17:42:54,803] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:42:54,803] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 17:42:54,803] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2022-11-25 17:42:54,803] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:42:54,803] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 17:42:54,803] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2022-11-25 17:42:54,803] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:42:54,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 17:42:54,804] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2022-11-25 17:42:54,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:42:54,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 17:42:54,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2022-11-25 17:42:54,790] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:42:54,790] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 17:42:54,790] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2022-11-25 17:42:54,790] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:42:54,790] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 17:42:54,790] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:42:54,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:42:54,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:42:54,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 17:42:54,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2022-11-25 17:42:54,795] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:42:54,795] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 17:42:54,796] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2022-11-25 17:42:54,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:42:54,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 17:42:54,804] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2022-11-25 17:42:54,813] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 17:42:54,813] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2022-11-25 17:42:54,815] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:42:54,815] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 17:42:54,815] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2022-11-25 17:42:54,824] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:42:54,824] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step2000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 17:42:54,824] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: successfully saved checkpoint at iteration 2000 to checkpoints_146m +7: time (ms) | save-checkpoint: 407.42 +7: iteration 2010/ 21553 | consumed samples: 514560 | consumed tokens: 1053818880 | elapsed time per iteration (s): 0.36 | learning rate: 1.969E-04 | global batch size: 256 | lm loss: 3.172287E+00 | grad norm: 0.485 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 710.599 | TFLOPs: 24.88 | +7: iteration 2020/ 21553 | consumed samples: 517120 | consumed tokens: 1059061760 | elapsed time per iteration (s): 0.30 | learning rate: 1.968E-04 | global batch size: 256 | lm loss: 3.183387E+00 | grad norm: 0.487 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.727 | TFLOPs: 29.47 | +7: iteration 2030/ 21553 | consumed samples: 519680 | consumed tokens: 1064304640 | elapsed time per iteration (s): 0.30 | learning rate: 1.968E-04 | global batch size: 256 | lm loss: 3.104433E+00 | grad norm: 0.778 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.066 | TFLOPs: 29.55 | +7: iteration 2040/ 21553 | consumed samples: 522240 | consumed tokens: 1069547520 | elapsed time per iteration (s): 0.31 | learning rate: 1.968E-04 | global batch size: 256 | lm loss: 3.136581E+00 | grad norm: 0.496 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.744 | TFLOPs: 29.22 | +7: iteration 2050/ 21553 | consumed samples: 524800 | consumed tokens: 1074790400 | elapsed time per iteration (s): 0.30 | learning rate: 1.967E-04 | global batch size: 256 | lm loss: 3.159616E+00 | grad norm: 0.485 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.569 | TFLOPs: 29.39 | +7: iteration 2060/ 21553 | consumed samples: 527360 | consumed tokens: 1080033280 | elapsed time per iteration (s): 0.30 | learning rate: 1.967E-04 | global batch size: 256 | lm loss: 3.128757E+00 | grad norm: 0.549 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.678 | TFLOPs: 29.50 | +7: iteration 2070/ 21553 | consumed samples: 529920 | consumed tokens: 1085276160 | elapsed time per iteration (s): 0.31 | learning rate: 1.967E-04 | global batch size: 256 | lm loss: 3.105647E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.756 | TFLOPs: 28.73 | +7: iteration 2080/ 21553 | consumed samples: 532480 | consumed tokens: 1090519040 | elapsed time per iteration (s): 0.31 | learning rate: 1.966E-04 | global batch size: 256 | lm loss: 3.128176E+00 | grad norm: 0.495 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.712 | TFLOPs: 29.29 | +7: iteration 2090/ 21553 | consumed samples: 535040 | consumed tokens: 1095761920 | elapsed time per iteration (s): 0.31 | learning rate: 1.966E-04 | global batch size: 256 | lm loss: 3.118604E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.882 | TFLOPs: 29.30 | +7: iteration 2100/ 21553 | consumed samples: 537600 | consumed tokens: 1101004800 | elapsed time per iteration (s): 0.31 | learning rate: 1.966E-04 | global batch size: 256 | lm loss: 3.108379E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.871 | TFLOPs: 28.60 | +7: iteration 2110/ 21553 | consumed samples: 540160 | consumed tokens: 1106247680 | elapsed time per iteration (s): 0.31 | learning rate: 1.965E-04 | global batch size: 256 | lm loss: 3.092039E+00 | grad norm: 0.603 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.782 | TFLOPs: 29.01 | +7: iteration 2120/ 21553 | consumed samples: 542720 | consumed tokens: 1111490560 | elapsed time per iteration (s): 0.32 | learning rate: 1.965E-04 | global batch size: 256 | lm loss: 3.111482E+00 | grad norm: 0.513 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 804.008 | TFLOPs: 28.15 | +7: iteration 2130/ 21553 | consumed samples: 545280 | consumed tokens: 1116733440 | elapsed time per iteration (s): 0.31 | learning rate: 1.964E-04 | global batch size: 256 | lm loss: 3.111909E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.207 | TFLOPs: 28.78 | +7: iteration 2140/ 21553 | consumed samples: 547840 | consumed tokens: 1121976320 | elapsed time per iteration (s): 0.31 | learning rate: 1.964E-04 | global batch size: 256 | lm loss: 3.105052E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.999 | TFLOPs: 29.20 | +7: iteration 2150/ 21553 | consumed samples: 550400 | consumed tokens: 1127219200 | elapsed time per iteration (s): 0.31 | learning rate: 1.964E-04 | global batch size: 256 | lm loss: 3.093842E+00 | grad norm: 0.507 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.863 | TFLOPs: 28.63 | +7: iteration 2160/ 21553 | consumed samples: 552960 | consumed tokens: 1132462080 | elapsed time per iteration (s): 0.31 | learning rate: 1.963E-04 | global batch size: 256 | lm loss: 3.098286E+00 | grad norm: 0.510 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.512 | TFLOPs: 29.00 | +7: iteration 2170/ 21553 | consumed samples: 555520 | consumed tokens: 1137704960 | elapsed time per iteration (s): 0.31 | learning rate: 1.963E-04 | global batch size: 256 | lm loss: 3.116716E+00 | grad norm: 0.573 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.191 | TFLOPs: 28.57 | +7: iteration 2180/ 21553 | consumed samples: 558080 | consumed tokens: 1142947840 | elapsed time per iteration (s): 0.31 | learning rate: 1.963E-04 | global batch size: 256 | lm loss: 3.124041E+00 | grad norm: 0.509 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.819 | TFLOPs: 29.36 | +7: iteration 2190/ 21553 | consumed samples: 560640 | consumed tokens: 1148190720 | elapsed time per iteration (s): 0.33 | learning rate: 1.962E-04 | global batch size: 256 | lm loss: 3.097311E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 777.831 | TFLOPs: 27.23 | +7: iteration 2200/ 21553 | consumed samples: 563200 | consumed tokens: 1153433600 | elapsed time per iteration (s): 0.31 | learning rate: 1.962E-04 | global batch size: 256 | lm loss: 3.120233E+00 | grad norm: 0.491 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.607 | TFLOPs: 29.04 | +7: iteration 2210/ 21553 | consumed samples: 565760 | consumed tokens: 1158676480 | elapsed time per iteration (s): 0.31 | learning rate: 1.961E-04 | global batch size: 256 | lm loss: 3.069082E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.719 | TFLOPs: 29.19 | +7: iteration 2220/ 21553 | consumed samples: 568320 | consumed tokens: 1163919360 | elapsed time per iteration (s): 0.31 | learning rate: 1.961E-04 | global batch size: 256 | lm loss: 3.138517E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.888 | TFLOPs: 29.05 | +7: iteration 2230/ 21553 | consumed samples: 570880 | consumed tokens: 1169162240 | elapsed time per iteration (s): 0.31 | learning rate: 1.961E-04 | global batch size: 256 | lm loss: 3.090035E+00 | grad norm: 0.481 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.711 | TFLOPs: 29.22 | +7: iteration 2240/ 21553 | consumed samples: 573440 | consumed tokens: 1174405120 | elapsed time per iteration (s): 0.31 | learning rate: 1.960E-04 | global batch size: 256 | lm loss: 3.118276E+00 | grad norm: 0.473 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.300 | TFLOPs: 29.24 | +7: iteration 2250/ 21553 | consumed samples: 576000 | consumed tokens: 1179648000 | elapsed time per iteration (s): 0.31 | learning rate: 1.960E-04 | global batch size: 256 | lm loss: 3.078548E+00 | grad norm: 0.559 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.671 | TFLOPs: 28.55 | +7: iteration 2260/ 21553 | consumed samples: 578560 | consumed tokens: 1184890880 | elapsed time per iteration (s): 0.31 | learning rate: 1.960E-04 | global batch size: 256 | lm loss: 3.043895E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.250 | TFLOPs: 28.96 | +7: iteration 2270/ 21553 | consumed samples: 581120 | consumed tokens: 1190133760 | elapsed time per iteration (s): 0.32 | learning rate: 1.959E-04 | global batch size: 256 | lm loss: 3.046649E+00 | grad norm: 0.578 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 797.619 | TFLOPs: 27.92 | +7: iteration 2280/ 21553 | consumed samples: 583680 | consumed tokens: 1195376640 | elapsed time per iteration (s): 0.33 | learning rate: 1.959E-04 | global batch size: 256 | lm loss: 3.071298E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 786.033 | TFLOPs: 27.52 | +7: iteration 2290/ 21553 | consumed samples: 586240 | consumed tokens: 1200619520 | elapsed time per iteration (s): 0.31 | learning rate: 1.958E-04 | global batch size: 256 | lm loss: 3.103015E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.019 | TFLOPs: 28.74 | +7: iteration 2300/ 21553 | consumed samples: 588800 | consumed tokens: 1205862400 | elapsed time per iteration (s): 0.31 | learning rate: 1.958E-04 | global batch size: 256 | lm loss: 3.077860E+00 | grad norm: 0.496 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.811 | TFLOPs: 29.19 | +7: iteration 2310/ 21553 | consumed samples: 591360 | consumed tokens: 1211105280 | elapsed time per iteration (s): 0.31 | learning rate: 1.958E-04 | global batch size: 256 | lm loss: 3.098472E+00 | grad norm: 0.467 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.416 | TFLOPs: 28.69 | +7: iteration 2320/ 21553 | consumed samples: 593920 | consumed tokens: 1216348160 | elapsed time per iteration (s): 0.31 | learning rate: 1.957E-04 | global batch size: 256 | lm loss: 3.091747E+00 | grad norm: 0.490 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.277 | TFLOPs: 28.82 | +7: iteration 2330/ 21553 | consumed samples: 596480 | consumed tokens: 1221591040 | elapsed time per iteration (s): 0.31 | learning rate: 1.957E-04 | global batch size: 256 | lm loss: 3.018003E+00 | grad norm: 0.396 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.246 | TFLOPs: 28.82 | +7: iteration 2340/ 21553 | consumed samples: 599040 | consumed tokens: 1226833920 | elapsed time per iteration (s): 0.31 | learning rate: 1.956E-04 | global batch size: 256 | lm loss: 3.033192E+00 | grad norm: 0.488 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.645 | TFLOPs: 29.15 | +7: iteration 2350/ 21553 | consumed samples: 601600 | consumed tokens: 1232076800 | elapsed time per iteration (s): 0.31 | learning rate: 1.956E-04 | global batch size: 256 | lm loss: 3.062652E+00 | grad norm: 0.476 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.983 | TFLOPs: 28.88 | +7: iteration 2360/ 21553 | consumed samples: 604160 | consumed tokens: 1237319680 | elapsed time per iteration (s): 0.32 | learning rate: 1.956E-04 | global batch size: 256 | lm loss: 3.081550E+00 | grad norm: 0.469 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 809.546 | TFLOPs: 28.34 | +7: iteration 2370/ 21553 | consumed samples: 606720 | consumed tokens: 1242562560 | elapsed time per iteration (s): 0.30 | learning rate: 1.955E-04 | global batch size: 256 | lm loss: 3.090641E+00 | grad norm: 0.524 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.041 | TFLOPs: 29.58 | +7: iteration 2380/ 21553 | consumed samples: 609280 | consumed tokens: 1247805440 | elapsed time per iteration (s): 0.31 | learning rate: 1.955E-04 | global batch size: 256 | lm loss: 3.071224E+00 | grad norm: 0.626 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.059 | TFLOPs: 28.71 | +7: iteration 2390/ 21553 | consumed samples: 611840 | consumed tokens: 1253048320 | elapsed time per iteration (s): 0.30 | learning rate: 1.954E-04 | global batch size: 256 | lm loss: 3.060693E+00 | grad norm: 0.469 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.870 | TFLOPs: 29.75 | +7: iteration 2400/ 21553 | consumed samples: 614400 | consumed tokens: 1258291200 | elapsed time per iteration (s): 0.31 | learning rate: 1.954E-04 | global batch size: 256 | lm loss: 3.104784E+00 | grad norm: 0.464 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.906 | TFLOPs: 29.09 | +7: iteration 2410/ 21553 | consumed samples: 616960 | consumed tokens: 1263534080 | elapsed time per iteration (s): 0.31 | learning rate: 1.953E-04 | global batch size: 256 | lm loss: 3.067302E+00 | grad norm: 0.468 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.537 | TFLOPs: 29.04 | +7: iteration 2420/ 21553 | consumed samples: 619520 | consumed tokens: 1268776960 | elapsed time per iteration (s): 0.30 | learning rate: 1.953E-04 | global batch size: 256 | lm loss: 3.056548E+00 | grad norm: 0.481 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.382 | TFLOPs: 29.52 | +7: iteration 2430/ 21553 | consumed samples: 622080 | consumed tokens: 1274019840 | elapsed time per iteration (s): 0.30 | learning rate: 1.953E-04 | global batch size: 256 | lm loss: 3.061819E+00 | grad norm: 0.580 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.725 | TFLOPs: 29.40 | +7: iteration 2440/ 21553 | consumed samples: 624640 | consumed tokens: 1279262720 | elapsed time per iteration (s): 0.30 | learning rate: 1.952E-04 | global batch size: 256 | lm loss: 3.033047E+00 | grad norm: 0.485 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.792 | TFLOPs: 29.43 | +7: iteration 2450/ 21553 | consumed samples: 627200 | consumed tokens: 1284505600 | elapsed time per iteration (s): 0.31 | learning rate: 1.952E-04 | global batch size: 256 | lm loss: 3.056227E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.504 | TFLOPs: 28.72 | +7: iteration 2460/ 21553 | consumed samples: 629760 | consumed tokens: 1289748480 | elapsed time per iteration (s): 0.30 | learning rate: 1.951E-04 | global batch size: 256 | lm loss: 3.081287E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.020 | TFLOPs: 29.65 | +7: iteration 2470/ 21553 | consumed samples: 632320 | consumed tokens: 1294991360 | elapsed time per iteration (s): 0.31 | learning rate: 1.951E-04 | global batch size: 256 | lm loss: 3.046768E+00 | grad norm: 0.536 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.092 | TFLOPs: 28.88 | +7: iteration 2480/ 21553 | consumed samples: 634880 | consumed tokens: 1300234240 | elapsed time per iteration (s): 0.31 | learning rate: 1.950E-04 | global batch size: 256 | lm loss: 3.026620E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.783 | TFLOPs: 28.77 | +7: iteration 2490/ 21553 | consumed samples: 637440 | consumed tokens: 1305477120 | elapsed time per iteration (s): 0.30 | learning rate: 1.950E-04 | global batch size: 256 | lm loss: 3.024126E+00 | grad norm: 0.454 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.478 | TFLOPs: 29.84 | +7: iteration 2500/ 21553 | consumed samples: 640000 | consumed tokens: 1310720000 | elapsed time per iteration (s): 0.30 | learning rate: 1.950E-04 | global batch size: 256 | lm loss: 3.054005E+00 | grad norm: 0.659 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.217 | TFLOPs: 29.41 | +7: iteration 2510/ 21553 | consumed samples: 642560 | consumed tokens: 1315962880 | elapsed time per iteration (s): 0.31 | learning rate: 1.949E-04 | global batch size: 256 | lm loss: 3.067900E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.479 | TFLOPs: 28.83 | +7: iteration 2520/ 21553 | consumed samples: 645120 | consumed tokens: 1321205760 | elapsed time per iteration (s): 0.30 | learning rate: 1.949E-04 | global batch size: 256 | lm loss: 3.019973E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.713 | TFLOPs: 29.43 | +7: iteration 2530/ 21553 | consumed samples: 647680 | consumed tokens: 1326448640 | elapsed time per iteration (s): 0.31 | learning rate: 1.948E-04 | global batch size: 256 | lm loss: 3.034238E+00 | grad norm: 0.456 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.008 | TFLOPs: 29.06 | +7: iteration 2540/ 21553 | consumed samples: 650240 | consumed tokens: 1331691520 | elapsed time per iteration (s): 0.31 | learning rate: 1.948E-04 | global batch size: 256 | lm loss: 3.015898E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.595 | TFLOPs: 28.94 | +7: iteration 2550/ 21553 | consumed samples: 652800 | consumed tokens: 1336934400 | elapsed time per iteration (s): 0.30 | learning rate: 1.947E-04 | global batch size: 256 | lm loss: 2.991460E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.475 | TFLOPs: 29.46 | +7: iteration 2560/ 21553 | consumed samples: 655360 | consumed tokens: 1342177280 | elapsed time per iteration (s): 0.30 | learning rate: 1.947E-04 | global batch size: 256 | lm loss: 3.006081E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.226 | TFLOPs: 29.48 | +7: iteration 2570/ 21553 | consumed samples: 657920 | consumed tokens: 1347420160 | elapsed time per iteration (s): 0.31 | learning rate: 1.946E-04 | global batch size: 256 | lm loss: 3.020905E+00 | grad norm: 0.484 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 814.315 | TFLOPs: 28.51 | +7: iteration 2580/ 21553 | consumed samples: 660480 | consumed tokens: 1352663040 | elapsed time per iteration (s): 0.31 | learning rate: 1.946E-04 | global batch size: 256 | lm loss: 3.025756E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.291 | TFLOPs: 29.31 | +7: iteration 2590/ 21553 | consumed samples: 663040 | consumed tokens: 1357905920 | elapsed time per iteration (s): 0.31 | learning rate: 1.946E-04 | global batch size: 256 | lm loss: 3.046802E+00 | grad norm: 0.521 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.231 | TFLOPs: 28.68 | +7: iteration 2600/ 21553 | consumed samples: 665600 | consumed tokens: 1363148800 | elapsed time per iteration (s): 0.31 | learning rate: 1.945E-04 | global batch size: 256 | lm loss: 2.995874E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.537 | TFLOPs: 28.86 | +7: iteration 2610/ 21553 | consumed samples: 668160 | consumed tokens: 1368391680 | elapsed time per iteration (s): 0.33 | learning rate: 1.945E-04 | global batch size: 256 | lm loss: 3.034709E+00 | grad norm: 0.500 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 773.338 | TFLOPs: 27.07 | +7: iteration 2620/ 21553 | consumed samples: 670720 | consumed tokens: 1373634560 | elapsed time per iteration (s): 0.31 | learning rate: 1.944E-04 | global batch size: 256 | lm loss: 3.043639E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.353 | TFLOPs: 29.10 | +7: iteration 2630/ 21553 | consumed samples: 673280 | consumed tokens: 1378877440 | elapsed time per iteration (s): 0.31 | learning rate: 1.944E-04 | global batch size: 256 | lm loss: 3.017611E+00 | grad norm: 0.461 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.961 | TFLOPs: 29.23 | +7: iteration 2640/ 21553 | consumed samples: 675840 | consumed tokens: 1384120320 | elapsed time per iteration (s): 0.31 | learning rate: 1.943E-04 | global batch size: 256 | lm loss: 3.065626E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.119 | TFLOPs: 29.38 | +7: iteration 2650/ 21553 | consumed samples: 678400 | consumed tokens: 1389363200 | elapsed time per iteration (s): 0.31 | learning rate: 1.943E-04 | global batch size: 256 | lm loss: 3.030737E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.759 | TFLOPs: 29.22 | +7: iteration 2660/ 21553 | consumed samples: 680960 | consumed tokens: 1394606080 | elapsed time per iteration (s): 0.32 | learning rate: 1.942E-04 | global batch size: 256 | lm loss: 3.001879E+00 | grad norm: 0.464 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 799.988 | TFLOPs: 28.01 | +7: iteration 2670/ 21553 | consumed samples: 683520 | consumed tokens: 1399848960 | elapsed time per iteration (s): 0.32 | learning rate: 1.942E-04 | global batch size: 256 | lm loss: 3.008892E+00 | grad norm: 0.490 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 799.711 | TFLOPs: 28.00 | +7: iteration 2680/ 21553 | consumed samples: 686080 | consumed tokens: 1405091840 | elapsed time per iteration (s): 0.31 | learning rate: 1.941E-04 | global batch size: 256 | lm loss: 2.973572E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.371 | TFLOPs: 28.54 | +7: iteration 2690/ 21553 | consumed samples: 688640 | consumed tokens: 1410334720 | elapsed time per iteration (s): 0.31 | learning rate: 1.941E-04 | global batch size: 256 | lm loss: 2.999112E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.628 | TFLOPs: 28.80 | +7: iteration 2700/ 21553 | consumed samples: 691200 | consumed tokens: 1415577600 | elapsed time per iteration (s): 0.31 | learning rate: 1.940E-04 | global batch size: 256 | lm loss: 3.011451E+00 | grad norm: 0.490 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.190 | TFLOPs: 28.89 | +7: iteration 2710/ 21553 | consumed samples: 693760 | consumed tokens: 1420820480 | elapsed time per iteration (s): 0.31 | learning rate: 1.940E-04 | global batch size: 256 | lm loss: 2.970536E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.300 | TFLOPs: 29.10 | +7: iteration 2720/ 21553 | consumed samples: 696320 | consumed tokens: 1426063360 | elapsed time per iteration (s): 0.31 | learning rate: 1.940E-04 | global batch size: 256 | lm loss: 3.031136E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 812.920 | TFLOPs: 28.46 | +7: iteration 2730/ 21553 | consumed samples: 698880 | consumed tokens: 1431306240 | elapsed time per iteration (s): 0.31 | learning rate: 1.939E-04 | global batch size: 256 | lm loss: 2.991223E+00 | grad norm: 0.513 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.131 | TFLOPs: 29.27 | +7: iteration 2740/ 21553 | consumed samples: 701440 | consumed tokens: 1436549120 | elapsed time per iteration (s): 0.31 | learning rate: 1.939E-04 | global batch size: 256 | lm loss: 2.965120E+00 | grad norm: 0.456 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.896 | TFLOPs: 29.09 | +7: iteration 2750/ 21553 | consumed samples: 704000 | consumed tokens: 1441792000 | elapsed time per iteration (s): 0.31 | learning rate: 1.938E-04 | global batch size: 256 | lm loss: 2.975767E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.385 | TFLOPs: 29.21 | +7: iteration 2760/ 21553 | consumed samples: 706560 | consumed tokens: 1447034880 | elapsed time per iteration (s): 0.31 | learning rate: 1.938E-04 | global batch size: 256 | lm loss: 2.989091E+00 | grad norm: 0.391 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.349 | TFLOPs: 28.79 | +7: iteration 2770/ 21553 | consumed samples: 709120 | consumed tokens: 1452277760 | elapsed time per iteration (s): 0.31 | learning rate: 1.937E-04 | global batch size: 256 | lm loss: 3.012546E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.040 | TFLOPs: 29.27 | +7: iteration 2780/ 21553 | consumed samples: 711680 | consumed tokens: 1457520640 | elapsed time per iteration (s): 0.31 | learning rate: 1.937E-04 | global batch size: 256 | lm loss: 2.999233E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.619 | TFLOPs: 28.97 | +7: iteration 2790/ 21553 | consumed samples: 714240 | consumed tokens: 1462763520 | elapsed time per iteration (s): 0.31 | learning rate: 1.936E-04 | global batch size: 256 | lm loss: 3.010411E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.146 | TFLOPs: 29.27 | +7: iteration 2800/ 21553 | consumed samples: 716800 | consumed tokens: 1468006400 | elapsed time per iteration (s): 0.31 | learning rate: 1.936E-04 | global batch size: 256 | lm loss: 3.036860E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.564 | TFLOPs: 29.32 | +7: iteration 2810/ 21553 | consumed samples: 719360 | consumed tokens: 1473249280 | elapsed time per iteration (s): 0.31 | learning rate: 1.935E-04 | global batch size: 256 | lm loss: 2.952808E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.238 | TFLOPs: 29.10 | +7: iteration 2820/ 21553 | consumed samples: 721920 | consumed tokens: 1478492160 | elapsed time per iteration (s): 0.31 | learning rate: 1.935E-04 | global batch size: 256 | lm loss: 3.030053E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.613 | TFLOPs: 29.18 | +7: iteration 2830/ 21553 | consumed samples: 724480 | consumed tokens: 1483735040 | elapsed time per iteration (s): 0.31 | learning rate: 1.934E-04 | global batch size: 256 | lm loss: 3.022050E+00 | grad norm: 0.398 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.819 | TFLOPs: 29.15 | +7: iteration 2840/ 21553 | consumed samples: 727040 | consumed tokens: 1488977920 | elapsed time per iteration (s): 0.30 | learning rate: 1.934E-04 | global batch size: 256 | lm loss: 2.977535E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.410 | TFLOPs: 29.56 | +7: iteration 2850/ 21553 | consumed samples: 729600 | consumed tokens: 1494220800 | elapsed time per iteration (s): 0.30 | learning rate: 1.933E-04 | global batch size: 256 | lm loss: 2.997456E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.026 | TFLOPs: 29.93 | +7: iteration 2860/ 21553 | consumed samples: 732160 | consumed tokens: 1499463680 | elapsed time per iteration (s): 0.31 | learning rate: 1.933E-04 | global batch size: 256 | lm loss: 2.992619E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.949 | TFLOPs: 29.16 | +7: iteration 2870/ 21553 | consumed samples: 734720 | consumed tokens: 1504706560 | elapsed time per iteration (s): 0.31 | learning rate: 1.932E-04 | global batch size: 256 | lm loss: 3.001000E+00 | grad norm: 0.393 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.830 | TFLOPs: 28.95 | +7: iteration 2880/ 21553 | consumed samples: 737280 | consumed tokens: 1509949440 | elapsed time per iteration (s): 0.30 | learning rate: 1.932E-04 | global batch size: 256 | lm loss: 2.988928E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.973 | TFLOPs: 29.93 | +7: iteration 2890/ 21553 | consumed samples: 739840 | consumed tokens: 1515192320 | elapsed time per iteration (s): 0.31 | learning rate: 1.931E-04 | global batch size: 256 | lm loss: 2.970795E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.961 | TFLOPs: 28.60 | +7: iteration 2900/ 21553 | consumed samples: 742400 | consumed tokens: 1520435200 | elapsed time per iteration (s): 0.31 | learning rate: 1.931E-04 | global batch size: 256 | lm loss: 2.973842E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.635 | TFLOPs: 28.97 | +7: iteration 2910/ 21553 | consumed samples: 744960 | consumed tokens: 1525678080 | elapsed time per iteration (s): 0.30 | learning rate: 1.930E-04 | global batch size: 256 | lm loss: 2.983010E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.325 | TFLOPs: 29.49 | +7: iteration 2920/ 21553 | consumed samples: 747520 | consumed tokens: 1530920960 | elapsed time per iteration (s): 0.31 | learning rate: 1.930E-04 | global batch size: 256 | lm loss: 2.941182E+00 | grad norm: 0.382 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.360 | TFLOPs: 29.35 | +7: iteration 2930/ 21553 | consumed samples: 750080 | consumed tokens: 1536163840 | elapsed time per iteration (s): 0.31 | learning rate: 1.929E-04 | global batch size: 256 | lm loss: 2.938729E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.409 | TFLOPs: 29.04 | +7: iteration 2940/ 21553 | consumed samples: 752640 | consumed tokens: 1541406720 | elapsed time per iteration (s): 0.30 | learning rate: 1.929E-04 | global batch size: 256 | lm loss: 2.950155E+00 | grad norm: 0.388 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.019 | TFLOPs: 29.48 | +7: iteration 2950/ 21553 | consumed samples: 755200 | consumed tokens: 1546649600 | elapsed time per iteration (s): 0.31 | learning rate: 1.928E-04 | global batch size: 256 | lm loss: 2.943657E+00 | grad norm: 0.395 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.322 | TFLOPs: 29.38 | +7: iteration 2960/ 21553 | consumed samples: 757760 | consumed tokens: 1551892480 | elapsed time per iteration (s): 0.31 | learning rate: 1.928E-04 | global batch size: 256 | lm loss: 2.996088E+00 | grad norm: 0.380 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.125 | TFLOPs: 28.89 | +7: iteration 2970/ 21553 | consumed samples: 760320 | consumed tokens: 1557135360 | elapsed time per iteration (s): 0.31 | learning rate: 1.927E-04 | global batch size: 256 | lm loss: 3.006113E+00 | grad norm: 0.386 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.344 | TFLOPs: 29.03 | +7: iteration 2980/ 21553 | consumed samples: 762880 | consumed tokens: 1562378240 | elapsed time per iteration (s): 0.30 | learning rate: 1.926E-04 | global batch size: 256 | lm loss: 2.979245E+00 | grad norm: 0.378 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.585 | TFLOPs: 29.74 | +7: iteration 2990/ 21553 | consumed samples: 765440 | consumed tokens: 1567621120 | elapsed time per iteration (s): 0.30 | learning rate: 1.926E-04 | global batch size: 256 | lm loss: 2.967761E+00 | grad norm: 0.398 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.339 | TFLOPs: 29.42 | +7: iteration 3000/ 21553 | consumed samples: 768000 | consumed tokens: 1572864000 | elapsed time per iteration (s): 0.30 | learning rate: 1.925E-04 | global batch size: 256 | lm loss: 2.968310E+00 | grad norm: 0.466 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.593 | TFLOPs: 29.74 | +7: ------------------------------------------------------------------------------------------ +7: valid loss at iteration 3000 | lm loss value: 2.899935E+00 | lm loss PPL: 1.817297E+01 | +7: ------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 3000 to checkpoints_146m +0: [2022-11-25 17:48:03,763] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step3000 is begin to save! +0: [2022-11-25 17:48:03,768] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_01-model_00-model_states.pt... +0: [2022-11-25 17:48:03,852] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_01-model_00-model_states.pt. +0: [2022-11-25 17:48:03,852] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_03-model_00-model_states.pt... +0: [2022-11-25 17:48:03,867] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_03-model_00-model_states.pt. +0: [2022-11-25 17:48:03,868] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_04-model_00-model_states.pt... +0: [2022-11-25 17:48:03,882] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_04-model_00-model_states.pt. +0: [2022-11-25 17:48:03,883] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_05-model_00-model_states.pt... +0: [2022-11-25 17:48:03,897] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_05-model_00-model_states.pt. +0: [2022-11-25 17:48:03,897] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_06-model_00-model_states.pt... +0: [2022-11-25 17:48:03,912] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_06-model_00-model_states.pt. +0: [2022-11-25 17:48:03,912] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_07-model_00-model_states.pt... +0: [2022-11-25 17:48:03,927] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_07-model_00-model_states.pt. +0: [2022-11-25 17:48:03,927] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_08-model_00-model_states.pt... +0: [2022-11-25 17:48:03,941] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_08-model_00-model_states.pt. +0: [2022-11-25 17:48:03,942] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_09-model_00-model_states.pt... +0: [2022-11-25 17:48:03,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_09-model_00-model_states.pt. +0: [2022-11-25 17:48:03,956] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_10-model_00-model_states.pt... +0: [2022-11-25 17:48:03,971] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_10-model_00-model_states.pt. +0: [2022-11-25 17:48:03,971] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_11-model_00-model_states.pt... +0: [2022-11-25 17:48:03,985] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_11-model_00-model_states.pt. +0: [2022-11-25 17:48:03,986] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_12-model_00-model_states.pt... +0: [2022-11-25 17:48:04,000] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_12-model_00-model_states.pt. +0: [2022-11-25 17:48:04,000] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_13-model_00-model_states.pt... +0: [2022-11-25 17:48:04,015] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_13-model_00-model_states.pt. +0: [2022-11-25 17:48:04,015] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_14-model_00-model_states.pt... +0: [2022-11-25 17:48:04,029] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_14-model_00-model_states.pt. +0: [2022-11-25 17:48:04,030] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_15-model_00-model_states.pt... +0: [2022-11-25 17:48:04,044] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_15-model_00-model_states.pt. +0: [2022-11-25 17:48:04,044] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_16-model_00-model_states.pt... +0: [2022-11-25 17:48:04,059] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_16-model_00-model_states.pt. +0: [2022-11-25 17:48:04,059] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_17-model_00-model_states.pt... +0: [2022-11-25 17:48:04,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_17-model_00-model_states.pt. +0: [2022-11-25 17:48:04,074] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/layer_19-model_00-model_states.pt... +0: [2022-11-25 17:48:04,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/layer_19-model_00-model_states.pt. +0: [2022-11-25 17:48:04,076] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step3000/mp_rank_00_model_states.pt +0: [2022-11-25 17:48:04,076] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/mp_rank_00_model_states.pt... +0: [2022-11-25 17:48:04,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/mp_rank_00_model_states.pt. +0: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:48:04,095] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step3000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:48:04,127] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:48:04,127] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 17:48:04,127] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2022-11-25 17:48:04,127] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:48:04,127] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 17:48:04,127] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2022-11-25 17:48:04,128] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:48:04,128] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +3: [2022-11-25 17:48:04,128] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:48:04,128] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2022-11-25 17:48:04,128] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 17:48:04,128] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2022-11-25 17:48:04,128] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:48:04,129] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +5: [2022-11-25 17:48:04,128] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:48:04,129] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +7: [2022-11-25 17:48:04,129] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2022-11-25 17:48:04,129] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2022-11-25 17:48:04,129] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:48:04,129] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +0: [2022-11-25 17:48:04,129] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:48:04,129] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2022-11-25 17:48:04,129] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 17:48:04,129] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2022-11-25 17:48:04,129] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:48:04,129] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 17:48:04,129] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2022-11-25 17:48:04,129] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:48:04,130] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 17:48:04,130] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2022-11-25 17:48:04,130] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:48:04,130] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 17:48:04,130] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2022-11-25 17:48:04,130] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:48:04,131] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 17:48:04,131] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2022-11-25 17:48:04,131] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:48:04,131] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 17:48:04,132] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2022-11-25 17:48:04,131] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:48:04,132] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 17:48:04,132] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2022-11-25 17:48:04,132] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:48:04,132] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 17:48:04,132] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2022-11-25 17:48:04,132] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:48:04,132] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 17:48:04,132] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:48:04,133] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:48:04,133] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:48:04,134] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 17:48:04,134] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2022-11-25 17:48:04,134] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:48:04,134] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 17:48:04,134] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2022-11-25 17:48:04,134] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:48:04,134] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 17:48:04,134] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2022-11-25 17:48:04,134] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:48:04,134] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 17:48:04,134] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2022-11-25 17:48:04,135] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:48:04,135] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 17:48:04,135] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2022-11-25 17:48:04,135] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:48:04,135] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 17:48:04,135] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2022-11-25 17:48:04,135] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:48:04,135] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 17:48:04,135] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2022-11-25 17:48:04,129] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:48:04,130] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:48:04,131] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:48:04,130] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +6: [2022-11-25 17:48:04,129] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +4: [2022-11-25 17:48:04,130] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2022-11-25 17:48:04,129] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2022-11-25 17:48:04,131] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:48:04,131] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:48:04,131] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +6: [2022-11-25 17:48:04,131] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +4: [2022-11-25 17:48:04,131] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2022-11-25 17:48:04,131] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:48:04,131] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2022-11-25 17:48:04,132] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:48:04,132] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +6: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:48:04,132] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +2: [2022-11-25 17:48:04,132] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +6: [2022-11-25 17:48:04,133] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +4: [2022-11-25 17:48:04,132] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2022-11-25 17:48:04,132] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2022-11-25 17:48:04,132] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:48:04,132] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2022-11-25 17:48:04,134] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:48:04,132] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +2: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:48:04,134] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +4: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2022-11-25 17:48:04,133] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 17:48:04,133] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +6: [2022-11-25 17:48:04,134] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2022-11-25 17:48:04,134] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2022-11-25 17:48:04,134] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:48:04,134] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +2: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:48:04,134] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +4: [2022-11-25 17:48:04,135] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2022-11-25 17:48:04,133] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +6: [2022-11-25 17:48:04,134] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2022-11-25 17:48:04,133] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2022-11-25 17:48:04,136] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:48:04,136] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +0: [2022-11-25 17:48:04,136] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:48:04,136] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:48:04,136] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:48:04,136] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2022-11-25 17:48:04,136] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 17:48:04,136] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 17:48:04,136] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2022-11-25 17:48:04,136] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2022-11-25 17:48:04,136] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:48:04,136] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 17:48:04,136] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2022-11-25 17:48:04,136] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:48:04,136] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 17:48:04,137] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2022-11-25 17:48:04,137] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:48:04,137] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1: [2022-11-25 17:48:04,137] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:48:04,137] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2022-11-25 17:48:04,137] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:48:04,137] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 17:48:04,137] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 17:48:04,137] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2022-11-25 17:48:04,137] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:48:04,138] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:48:04,138] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2022-11-25 17:48:04,138] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +1: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2022-11-25 17:48:04,139] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:48:04,139] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +0: [2022-11-25 17:48:04,139] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:48:04,139] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2022-11-25 17:48:04,139] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 17:48:04,139] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:48:04,138] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2022-11-25 17:48:04,138] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2022-11-25 17:48:04,138] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:48:04,138] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:48:04,139] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 17:48:04,139] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2022-11-25 17:48:04,139] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2022-11-25 17:48:04,139] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2022-11-25 17:48:04,141] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:48:04,142] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 17:48:04,142] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2022-11-25 17:48:04,143] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:48:04,143] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 17:48:04,143] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2022-11-25 17:48:04,145] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:48:04,145] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 17:48:04,145] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2022-11-25 17:48:04,158] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 17:48:04,158] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2022-11-25 17:48:04,160] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:48:04,160] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 17:48:04,160] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2022-11-25 17:48:04,164] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:48:04,164] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step3000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 17:48:04,164] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: successfully saved checkpoint at iteration 3000 to checkpoints_146m +7: time (ms) | save-checkpoint: 405.37 +7: iteration 3010/ 21553 | consumed samples: 770560 | consumed tokens: 1578106880 | elapsed time per iteration (s): 0.35 | learning rate: 1.925E-04 | global batch size: 256 | lm loss: 2.950051E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 730.136 | TFLOPs: 25.56 | +7: iteration 3020/ 21553 | consumed samples: 773120 | consumed tokens: 1583349760 | elapsed time per iteration (s): 0.30 | learning rate: 1.924E-04 | global batch size: 256 | lm loss: 2.969198E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.804 | TFLOPs: 29.47 | +7: iteration 3030/ 21553 | consumed samples: 775680 | consumed tokens: 1588592640 | elapsed time per iteration (s): 0.30 | learning rate: 1.924E-04 | global batch size: 256 | lm loss: 2.921142E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.519 | TFLOPs: 29.56 | +7: iteration 3040/ 21553 | consumed samples: 778240 | consumed tokens: 1593835520 | elapsed time per iteration (s): 0.31 | learning rate: 1.923E-04 | global batch size: 256 | lm loss: 2.963983E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.559 | TFLOPs: 28.55 | +7: iteration 3050/ 21553 | consumed samples: 780800 | consumed tokens: 1599078400 | elapsed time per iteration (s): 0.31 | learning rate: 1.923E-04 | global batch size: 256 | lm loss: 2.975103E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.346 | TFLOPs: 28.93 | +7: iteration 3060/ 21553 | consumed samples: 783360 | consumed tokens: 1604321280 | elapsed time per iteration (s): 0.31 | learning rate: 1.922E-04 | global batch size: 256 | lm loss: 2.990630E+00 | grad norm: 0.397 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 814.661 | TFLOPs: 28.52 | +7: iteration 3070/ 21553 | consumed samples: 785920 | consumed tokens: 1609564160 | elapsed time per iteration (s): 0.31 | learning rate: 1.922E-04 | global batch size: 256 | lm loss: 2.993185E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.470 | TFLOPs: 29.04 | +7: iteration 3080/ 21553 | consumed samples: 788480 | consumed tokens: 1614807040 | elapsed time per iteration (s): 0.30 | learning rate: 1.921E-04 | global batch size: 256 | lm loss: 2.956224E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.041 | TFLOPs: 30.14 | +7: iteration 3090/ 21553 | consumed samples: 791040 | consumed tokens: 1620049920 | elapsed time per iteration (s): 0.33 | learning rate: 1.921E-04 | global batch size: 256 | lm loss: 2.957062E+00 | grad norm: 0.616 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 775.357 | TFLOPs: 27.14 | +7: iteration 3100/ 21553 | consumed samples: 793600 | consumed tokens: 1625292800 | elapsed time per iteration (s): 0.30 | learning rate: 1.920E-04 | global batch size: 256 | lm loss: 2.989873E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.447 | TFLOPs: 29.67 | +7: iteration 3110/ 21553 | consumed samples: 796160 | consumed tokens: 1630535680 | elapsed time per iteration (s): 0.31 | learning rate: 1.920E-04 | global batch size: 256 | lm loss: 2.921026E+00 | grad norm: 0.400 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.624 | TFLOPs: 28.76 | +7: iteration 3120/ 21553 | consumed samples: 798720 | consumed tokens: 1635778560 | elapsed time per iteration (s): 0.31 | learning rate: 1.919E-04 | global batch size: 256 | lm loss: 2.964222E+00 | grad norm: 0.479 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 814.123 | TFLOPs: 28.50 | +7: iteration 3130/ 21553 | consumed samples: 801280 | consumed tokens: 1641021440 | elapsed time per iteration (s): 0.30 | learning rate: 1.918E-04 | global batch size: 256 | lm loss: 2.959863E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.147 | TFLOPs: 29.66 | +7: iteration 3140/ 21553 | consumed samples: 803840 | consumed tokens: 1646264320 | elapsed time per iteration (s): 0.30 | learning rate: 1.918E-04 | global batch size: 256 | lm loss: 2.925544E+00 | grad norm: 0.378 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.485 | TFLOPs: 29.46 | +7: iteration 3150/ 21553 | consumed samples: 806400 | consumed tokens: 1651507200 | elapsed time per iteration (s): 0.32 | learning rate: 1.917E-04 | global batch size: 256 | lm loss: 2.968598E+00 | grad norm: 0.376 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 790.496 | TFLOPs: 27.67 | +7: iteration 3160/ 21553 | consumed samples: 808960 | consumed tokens: 1656750080 | elapsed time per iteration (s): 0.33 | learning rate: 1.917E-04 | global batch size: 256 | lm loss: 2.919261E+00 | grad norm: 0.373 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 776.757 | TFLOPs: 27.19 | +7: iteration 3170/ 21553 | consumed samples: 811520 | consumed tokens: 1661992960 | elapsed time per iteration (s): 0.31 | learning rate: 1.916E-04 | global batch size: 256 | lm loss: 2.933940E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.603 | TFLOPs: 29.32 | +7: iteration 3180/ 21553 | consumed samples: 814080 | consumed tokens: 1667235840 | elapsed time per iteration (s): 0.31 | learning rate: 1.916E-04 | global batch size: 256 | lm loss: 2.954168E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.509 | TFLOPs: 28.79 | +7: iteration 3190/ 21553 | consumed samples: 816640 | consumed tokens: 1672478720 | elapsed time per iteration (s): 0.31 | learning rate: 1.915E-04 | global batch size: 256 | lm loss: 2.908175E+00 | grad norm: 0.403 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.808 | TFLOPs: 29.33 | +7: iteration 3200/ 21553 | consumed samples: 819200 | consumed tokens: 1677721600 | elapsed time per iteration (s): 0.31 | learning rate: 1.915E-04 | global batch size: 256 | lm loss: 2.916852E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.196 | TFLOPs: 28.99 | +7: iteration 3210/ 21553 | consumed samples: 821760 | consumed tokens: 1682964480 | elapsed time per iteration (s): 0.31 | learning rate: 1.914E-04 | global batch size: 256 | lm loss: 2.966151E+00 | grad norm: 0.405 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.573 | TFLOPs: 28.69 | +7: iteration 3220/ 21553 | consumed samples: 824320 | consumed tokens: 1688207360 | elapsed time per iteration (s): 0.31 | learning rate: 1.913E-04 | global batch size: 256 | lm loss: 2.925952E+00 | grad norm: 0.379 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.582 | TFLOPs: 29.11 | +7: iteration 3230/ 21553 | consumed samples: 826880 | consumed tokens: 1693450240 | elapsed time per iteration (s): 0.31 | learning rate: 1.913E-04 | global batch size: 256 | lm loss: 2.951740E+00 | grad norm: 0.360 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.731 | TFLOPs: 28.94 | +7: iteration 3240/ 21553 | consumed samples: 829440 | consumed tokens: 1698693120 | elapsed time per iteration (s): 0.31 | learning rate: 1.912E-04 | global batch size: 256 | lm loss: 2.923023E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.945 | TFLOPs: 29.19 | +7: iteration 3250/ 21553 | consumed samples: 832000 | consumed tokens: 1703936000 | elapsed time per iteration (s): 0.31 | learning rate: 1.912E-04 | global batch size: 256 | lm loss: 2.909302E+00 | grad norm: 1.888 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.718 | TFLOPs: 28.87 | +7: iteration 3260/ 21553 | consumed samples: 834560 | consumed tokens: 1709178880 | elapsed time per iteration (s): 0.31 | learning rate: 1.911E-04 | global batch size: 256 | lm loss: 2.945989E+00 | grad norm: 0.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.436 | TFLOPs: 29.25 | +7: iteration 3270/ 21553 | consumed samples: 837120 | consumed tokens: 1714421760 | elapsed time per iteration (s): 0.30 | learning rate: 1.911E-04 | global batch size: 256 | lm loss: 2.947116E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.182 | TFLOPs: 29.62 | +7: iteration 3280/ 21553 | consumed samples: 839680 | consumed tokens: 1719664640 | elapsed time per iteration (s): 0.30 | learning rate: 1.910E-04 | global batch size: 256 | lm loss: 2.928703E+00 | grad norm: 0.367 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.948 | TFLOPs: 29.82 | +7: iteration 3290/ 21553 | consumed samples: 842240 | consumed tokens: 1724907520 | elapsed time per iteration (s): 0.30 | learning rate: 1.909E-04 | global batch size: 256 | lm loss: 2.897358E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.747 | TFLOPs: 29.54 | +7: iteration 3300/ 21553 | consumed samples: 844800 | consumed tokens: 1730150400 | elapsed time per iteration (s): 0.31 | learning rate: 1.909E-04 | global batch size: 256 | lm loss: 2.954006E+00 | grad norm: 0.459 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.987 | TFLOPs: 29.34 | +7: iteration 3310/ 21553 | consumed samples: 847360 | consumed tokens: 1735393280 | elapsed time per iteration (s): 0.31 | learning rate: 1.908E-04 | global batch size: 256 | lm loss: 2.933070E+00 | grad norm: 0.383 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.794 | TFLOPs: 29.29 | +7: iteration 3320/ 21553 | consumed samples: 849920 | consumed tokens: 1740636160 | elapsed time per iteration (s): 0.30 | learning rate: 1.908E-04 | global batch size: 256 | lm loss: 2.948522E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.039 | TFLOPs: 29.44 | +7: iteration 3330/ 21553 | consumed samples: 852480 | consumed tokens: 1745879040 | elapsed time per iteration (s): 0.31 | learning rate: 1.907E-04 | global batch size: 256 | lm loss: 2.961921E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.128 | TFLOPs: 28.92 | +7: iteration 3340/ 21553 | consumed samples: 855040 | consumed tokens: 1751121920 | elapsed time per iteration (s): 0.31 | learning rate: 1.906E-04 | global batch size: 256 | lm loss: 2.953861E+00 | grad norm: 0.399 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.638 | TFLOPs: 29.11 | +7: iteration 3350/ 21553 | consumed samples: 857600 | consumed tokens: 1756364800 | elapsed time per iteration (s): 0.30 | learning rate: 1.906E-04 | global batch size: 256 | lm loss: 2.940591E+00 | grad norm: 0.396 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.198 | TFLOPs: 29.45 | +7: iteration 3360/ 21553 | consumed samples: 860160 | consumed tokens: 1761607680 | elapsed time per iteration (s): 0.31 | learning rate: 1.905E-04 | global batch size: 256 | lm loss: 2.930783E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.934 | TFLOPs: 28.77 | +7: iteration 3370/ 21553 | consumed samples: 862720 | consumed tokens: 1766850560 | elapsed time per iteration (s): 0.31 | learning rate: 1.905E-04 | global batch size: 256 | lm loss: 2.951601E+00 | grad norm: 0.387 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.979 | TFLOPs: 29.16 | +7: iteration 3380/ 21553 | consumed samples: 865280 | consumed tokens: 1772093440 | elapsed time per iteration (s): 0.30 | learning rate: 1.904E-04 | global batch size: 256 | lm loss: 2.930437E+00 | grad norm: 0.366 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.342 | TFLOPs: 29.80 | +7: iteration 3390/ 21553 | consumed samples: 867840 | consumed tokens: 1777336320 | elapsed time per iteration (s): 0.30 | learning rate: 1.903E-04 | global batch size: 256 | lm loss: 2.926983E+00 | grad norm: 0.398 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.276 | TFLOPs: 29.66 | +7: iteration 3400/ 21553 | consumed samples: 870400 | consumed tokens: 1782579200 | elapsed time per iteration (s): 0.30 | learning rate: 1.903E-04 | global batch size: 256 | lm loss: 2.899094E+00 | grad norm: 0.393 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.551 | TFLOPs: 29.39 | +7: iteration 3410/ 21553 | consumed samples: 872960 | consumed tokens: 1787822080 | elapsed time per iteration (s): 0.30 | learning rate: 1.902E-04 | global batch size: 256 | lm loss: 2.962174E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.029 | TFLOPs: 29.51 | +7: iteration 3420/ 21553 | consumed samples: 875520 | consumed tokens: 1793064960 | elapsed time per iteration (s): 0.31 | learning rate: 1.902E-04 | global batch size: 256 | lm loss: 2.947670E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.377 | TFLOPs: 29.03 | +7: iteration 3430/ 21553 | consumed samples: 878080 | consumed tokens: 1798307840 | elapsed time per iteration (s): 0.31 | learning rate: 1.901E-04 | global batch size: 256 | lm loss: 2.901071E+00 | grad norm: 0.384 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.728 | TFLOPs: 28.98 | +7: iteration 3440/ 21553 | consumed samples: 880640 | consumed tokens: 1803550720 | elapsed time per iteration (s): 0.31 | learning rate: 1.900E-04 | global batch size: 256 | lm loss: 2.873376E+00 | grad norm: 0.391 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.774 | TFLOPs: 29.33 | +7: iteration 3450/ 21553 | consumed samples: 883200 | consumed tokens: 1808793600 | elapsed time per iteration (s): 0.31 | learning rate: 1.900E-04 | global batch size: 256 | lm loss: 2.943036E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.367 | TFLOPs: 29.21 | +7: iteration 3460/ 21553 | consumed samples: 885760 | consumed tokens: 1814036480 | elapsed time per iteration (s): 0.30 | learning rate: 1.899E-04 | global batch size: 256 | lm loss: 2.910817E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.480 | TFLOPs: 29.49 | +7: iteration 3470/ 21553 | consumed samples: 888320 | consumed tokens: 1819279360 | elapsed time per iteration (s): 0.30 | learning rate: 1.899E-04 | global batch size: 256 | lm loss: 2.940544E+00 | grad norm: 0.395 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.411 | TFLOPs: 29.49 | +7: iteration 3480/ 21553 | consumed samples: 890880 | consumed tokens: 1824522240 | elapsed time per iteration (s): 0.31 | learning rate: 1.898E-04 | global batch size: 256 | lm loss: 2.937158E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.407 | TFLOPs: 29.04 | +7: iteration 3490/ 21553 | consumed samples: 893440 | consumed tokens: 1829765120 | elapsed time per iteration (s): 0.30 | learning rate: 1.897E-04 | global batch size: 256 | lm loss: 2.905516E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.288 | TFLOPs: 29.73 | +7: iteration 3500/ 21553 | consumed samples: 896000 | consumed tokens: 1835008000 | elapsed time per iteration (s): 0.30 | learning rate: 1.897E-04 | global batch size: 256 | lm loss: 2.903256E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.981 | TFLOPs: 29.48 | +7: iteration 3510/ 21553 | consumed samples: 898560 | consumed tokens: 1840250880 | elapsed time per iteration (s): 0.31 | learning rate: 1.896E-04 | global batch size: 256 | lm loss: 2.916640E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.343 | TFLOPs: 28.96 | +7: iteration 3520/ 21553 | consumed samples: 901120 | consumed tokens: 1845493760 | elapsed time per iteration (s): 0.31 | learning rate: 1.896E-04 | global batch size: 256 | lm loss: 2.891315E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.411 | TFLOPs: 29.32 | +7: iteration 3530/ 21553 | consumed samples: 903680 | consumed tokens: 1850736640 | elapsed time per iteration (s): 0.31 | learning rate: 1.895E-04 | global batch size: 256 | lm loss: 2.906446E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.710 | TFLOPs: 29.22 | +7: iteration 3540/ 21553 | consumed samples: 906240 | consumed tokens: 1855979520 | elapsed time per iteration (s): 0.31 | learning rate: 1.894E-04 | global batch size: 256 | lm loss: 2.934390E+00 | grad norm: 0.549 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.350 | TFLOPs: 29.31 | +7: iteration 3550/ 21553 | consumed samples: 908800 | consumed tokens: 1861222400 | elapsed time per iteration (s): 0.30 | learning rate: 1.894E-04 | global batch size: 256 | lm loss: 2.905687E+00 | grad norm: 0.391 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.033 | TFLOPs: 29.69 | +7: iteration 3560/ 21553 | consumed samples: 911360 | consumed tokens: 1866465280 | elapsed time per iteration (s): 0.31 | learning rate: 1.893E-04 | global batch size: 256 | lm loss: 2.927895E+00 | grad norm: 0.382 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.411 | TFLOPs: 28.62 | +7: iteration 3570/ 21553 | consumed samples: 913920 | consumed tokens: 1871708160 | elapsed time per iteration (s): 0.31 | learning rate: 1.892E-04 | global batch size: 256 | lm loss: 2.910737E+00 | grad norm: 0.351 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.130 | TFLOPs: 28.78 | +7: iteration 3580/ 21553 | consumed samples: 916480 | consumed tokens: 1876951040 | elapsed time per iteration (s): 0.30 | learning rate: 1.892E-04 | global batch size: 256 | lm loss: 2.924767E+00 | grad norm: 0.371 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.028 | TFLOPs: 29.41 | +7: iteration 3590/ 21553 | consumed samples: 919040 | consumed tokens: 1882193920 | elapsed time per iteration (s): 0.31 | learning rate: 1.891E-04 | global batch size: 256 | lm loss: 2.880490E+00 | grad norm: 0.356 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.199 | TFLOPs: 28.96 | +7: iteration 3600/ 21553 | consumed samples: 921600 | consumed tokens: 1887436800 | elapsed time per iteration (s): 0.31 | learning rate: 1.891E-04 | global batch size: 256 | lm loss: 2.898811E+00 | grad norm: 0.405 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.395 | TFLOPs: 29.31 | +7: iteration 3610/ 21553 | consumed samples: 924160 | consumed tokens: 1892679680 | elapsed time per iteration (s): 0.30 | learning rate: 1.890E-04 | global batch size: 256 | lm loss: 2.921773E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.730 | TFLOPs: 29.75 | +7: iteration 3620/ 21553 | consumed samples: 926720 | consumed tokens: 1897922560 | elapsed time per iteration (s): 0.31 | learning rate: 1.889E-04 | global batch size: 256 | lm loss: 2.887211E+00 | grad norm: 0.381 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.604 | TFLOPs: 28.94 | +7: iteration 3630/ 21553 | consumed samples: 929280 | consumed tokens: 1903165440 | elapsed time per iteration (s): 0.31 | learning rate: 1.889E-04 | global batch size: 256 | lm loss: 2.917323E+00 | grad norm: 0.387 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.820 | TFLOPs: 28.70 | +7: iteration 3640/ 21553 | consumed samples: 931840 | consumed tokens: 1908408320 | elapsed time per iteration (s): 0.32 | learning rate: 1.888E-04 | global batch size: 256 | lm loss: 2.927448E+00 | grad norm: 0.381 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 811.164 | TFLOPs: 28.40 | +7: iteration 3650/ 21553 | consumed samples: 934400 | consumed tokens: 1913651200 | elapsed time per iteration (s): 0.32 | learning rate: 1.887E-04 | global batch size: 256 | lm loss: 2.886488E+00 | grad norm: 0.370 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 789.480 | TFLOPs: 27.64 | +7: iteration 3660/ 21553 | consumed samples: 936960 | consumed tokens: 1918894080 | elapsed time per iteration (s): 0.31 | learning rate: 1.887E-04 | global batch size: 256 | lm loss: 2.890004E+00 | grad norm: 0.358 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.130 | TFLOPs: 29.03 | +7: iteration 3670/ 21553 | consumed samples: 939520 | consumed tokens: 1924136960 | elapsed time per iteration (s): 0.30 | learning rate: 1.886E-04 | global batch size: 256 | lm loss: 2.885680E+00 | grad norm: 0.392 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.899 | TFLOPs: 29.75 | +7: iteration 3680/ 21553 | consumed samples: 942080 | consumed tokens: 1929379840 | elapsed time per iteration (s): 0.31 | learning rate: 1.885E-04 | global batch size: 256 | lm loss: 2.920539E+00 | grad norm: 0.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.792 | TFLOPs: 29.15 | +7: iteration 3690/ 21553 | consumed samples: 944640 | consumed tokens: 1934622720 | elapsed time per iteration (s): 0.30 | learning rate: 1.885E-04 | global batch size: 256 | lm loss: 2.916620E+00 | grad norm: 0.359 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.135 | TFLOPs: 29.62 | +7: iteration 3700/ 21553 | consumed samples: 947200 | consumed tokens: 1939865600 | elapsed time per iteration (s): 0.30 | learning rate: 1.884E-04 | global batch size: 256 | lm loss: 2.890701E+00 | grad norm: 0.355 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.063 | TFLOPs: 29.90 | +7: iteration 3710/ 21553 | consumed samples: 949760 | consumed tokens: 1945108480 | elapsed time per iteration (s): 0.30 | learning rate: 1.883E-04 | global batch size: 256 | lm loss: 2.901863E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.322 | TFLOPs: 29.42 | +7: iteration 3720/ 21553 | consumed samples: 952320 | consumed tokens: 1950351360 | elapsed time per iteration (s): 0.31 | learning rate: 1.883E-04 | global batch size: 256 | lm loss: 2.911006E+00 | grad norm: 0.385 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.445 | TFLOPs: 28.79 | +7: iteration 3730/ 21553 | consumed samples: 954880 | consumed tokens: 1955594240 | elapsed time per iteration (s): 0.31 | learning rate: 1.882E-04 | global batch size: 256 | lm loss: 2.901950E+00 | grad norm: 0.383 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.559 | TFLOPs: 29.15 | +7: iteration 3740/ 21553 | consumed samples: 957440 | consumed tokens: 1960837120 | elapsed time per iteration (s): 0.31 | learning rate: 1.882E-04 | global batch size: 256 | lm loss: 2.882027E+00 | grad norm: 0.372 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.856 | TFLOPs: 29.30 | +7: iteration 3750/ 21553 | consumed samples: 960000 | consumed tokens: 1966080000 | elapsed time per iteration (s): 0.31 | learning rate: 1.881E-04 | global batch size: 256 | lm loss: 2.918670E+00 | grad norm: 0.373 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.812 | TFLOPs: 28.73 | +7: iteration 3760/ 21553 | consumed samples: 962560 | consumed tokens: 1971322880 | elapsed time per iteration (s): 0.31 | learning rate: 1.880E-04 | global batch size: 256 | lm loss: 2.865446E+00 | grad norm: 0.386 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.975 | TFLOPs: 28.88 | +7: iteration 3770/ 21553 | consumed samples: 965120 | consumed tokens: 1976565760 | elapsed time per iteration (s): 0.30 | learning rate: 1.880E-04 | global batch size: 256 | lm loss: 2.892307E+00 | grad norm: 0.382 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.820 | TFLOPs: 29.50 | +7: iteration 3780/ 21553 | consumed samples: 967680 | consumed tokens: 1981808640 | elapsed time per iteration (s): 0.30 | learning rate: 1.879E-04 | global batch size: 256 | lm loss: 2.889087E+00 | grad norm: 0.387 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.509 | TFLOPs: 30.02 | +7: iteration 3790/ 21553 | consumed samples: 970240 | consumed tokens: 1987051520 | elapsed time per iteration (s): 0.31 | learning rate: 1.878E-04 | global batch size: 256 | lm loss: 2.899759E+00 | grad norm: 0.374 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.298 | TFLOPs: 29.24 | +7: iteration 3800/ 21553 | consumed samples: 972800 | consumed tokens: 1992294400 | elapsed time per iteration (s): 0.31 | learning rate: 1.878E-04 | global batch size: 256 | lm loss: 2.899917E+00 | grad norm: 0.367 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.986 | TFLOPs: 29.37 | +7: iteration 3810/ 21553 | consumed samples: 975360 | consumed tokens: 1997537280 | elapsed time per iteration (s): 0.31 | learning rate: 1.877E-04 | global batch size: 256 | lm loss: 2.888702E+00 | grad norm: 0.381 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.943 | TFLOPs: 29.02 | +7: iteration 3820/ 21553 | consumed samples: 977920 | consumed tokens: 2002780160 | elapsed time per iteration (s): 0.31 | learning rate: 1.876E-04 | global batch size: 256 | lm loss: 2.902851E+00 | grad norm: 0.401 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.552 | TFLOPs: 28.97 | +7: iteration 3830/ 21553 | consumed samples: 980480 | consumed tokens: 2008023040 | elapsed time per iteration (s): 0.32 | learning rate: 1.876E-04 | global batch size: 256 | lm loss: 2.918756E+00 | grad norm: 0.374 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 811.194 | TFLOPs: 28.40 | +7: iteration 3840/ 21553 | consumed samples: 983040 | consumed tokens: 2013265920 | elapsed time per iteration (s): 0.30 | learning rate: 1.875E-04 | global batch size: 256 | lm loss: 2.884877E+00 | grad norm: 0.391 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.016 | TFLOPs: 29.79 | +7: iteration 3850/ 21553 | consumed samples: 985600 | consumed tokens: 2018508800 | elapsed time per iteration (s): 0.31 | learning rate: 1.874E-04 | global batch size: 256 | lm loss: 2.856776E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.731 | TFLOPs: 28.73 | +7: iteration 3860/ 21553 | consumed samples: 988160 | consumed tokens: 2023751680 | elapsed time per iteration (s): 0.31 | learning rate: 1.874E-04 | global batch size: 256 | lm loss: 2.878135E+00 | grad norm: 0.388 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.410 | TFLOPs: 29.25 | +7: iteration 3870/ 21553 | consumed samples: 990720 | consumed tokens: 2028994560 | elapsed time per iteration (s): 0.30 | learning rate: 1.873E-04 | global batch size: 256 | lm loss: 2.845122E+00 | grad norm: 0.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.718 | TFLOPs: 29.75 | +7: iteration 3880/ 21553 | consumed samples: 993280 | consumed tokens: 2034237440 | elapsed time per iteration (s): 0.30 | learning rate: 1.872E-04 | global batch size: 256 | lm loss: 2.900443E+00 | grad norm: 0.385 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.433 | TFLOPs: 29.53 | +7: iteration 3890/ 21553 | consumed samples: 995840 | consumed tokens: 2039480320 | elapsed time per iteration (s): 0.30 | learning rate: 1.871E-04 | global batch size: 256 | lm loss: 2.902424E+00 | grad norm: 0.399 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.290 | TFLOPs: 29.77 | +7: iteration 3900/ 21553 | consumed samples: 998400 | consumed tokens: 2044723200 | elapsed time per iteration (s): 0.31 | learning rate: 1.871E-04 | global batch size: 256 | lm loss: 2.863014E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.127 | TFLOPs: 28.89 | +7: iteration 3910/ 21553 | consumed samples: 1000960 | consumed tokens: 2049966080 | elapsed time per iteration (s): 0.32 | learning rate: 1.870E-04 | global batch size: 256 | lm loss: 2.884337E+00 | grad norm: 0.379 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 810.106 | TFLOPs: 28.36 | +7: iteration 3920/ 21553 | consumed samples: 1003520 | consumed tokens: 2055208960 | elapsed time per iteration (s): 0.30 | learning rate: 1.869E-04 | global batch size: 256 | lm loss: 2.860880E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.607 | TFLOPs: 29.92 | +7: iteration 3930/ 21553 | consumed samples: 1006080 | consumed tokens: 2060451840 | elapsed time per iteration (s): 0.30 | learning rate: 1.869E-04 | global batch size: 256 | lm loss: 2.908115E+00 | grad norm: 0.383 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.438 | TFLOPs: 29.88 | +7: iteration 3940/ 21553 | consumed samples: 1008640 | consumed tokens: 2065694720 | elapsed time per iteration (s): 0.30 | learning rate: 1.868E-04 | global batch size: 256 | lm loss: 2.841665E+00 | grad norm: 0.357 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.566 | TFLOPs: 29.46 | +7: iteration 3950/ 21553 | consumed samples: 1011200 | consumed tokens: 2070937600 | elapsed time per iteration (s): 0.30 | learning rate: 1.867E-04 | global batch size: 256 | lm loss: 2.868883E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.458 | TFLOPs: 29.74 | +7: iteration 3960/ 21553 | consumed samples: 1013760 | consumed tokens: 2076180480 | elapsed time per iteration (s): 0.30 | learning rate: 1.867E-04 | global batch size: 256 | lm loss: 2.842285E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.624 | TFLOPs: 29.95 | +7: iteration 3970/ 21553 | consumed samples: 1016320 | consumed tokens: 2081423360 | elapsed time per iteration (s): 0.31 | learning rate: 1.866E-04 | global batch size: 256 | lm loss: 2.904199E+00 | grad norm: 0.384 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.712 | TFLOPs: 29.15 | +7: iteration 3980/ 21553 | consumed samples: 1018880 | consumed tokens: 2086666240 | elapsed time per iteration (s): 0.30 | learning rate: 1.865E-04 | global batch size: 256 | lm loss: 2.879470E+00 | grad norm: 0.357 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.447 | TFLOPs: 29.39 | +7: iteration 3990/ 21553 | consumed samples: 1021440 | consumed tokens: 2091909120 | elapsed time per iteration (s): 0.31 | learning rate: 1.865E-04 | global batch size: 256 | lm loss: 2.895018E+00 | grad norm: 0.336 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.202 | TFLOPs: 29.38 | +0: [2022-11-25 17:53:11,275] [INFO] [logging.py:68:log_dist] [Rank 0] step=4000, skipped=0, lr=[0.00018638652011758862, 0.00018638652011758862, 0.00018638652011758862], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 4000/ 21553 | consumed samples: 1024000 | consumed tokens: 2097152000 | elapsed time per iteration (s): 0.30 | learning rate: 1.864E-04 | global batch size: 256 | lm loss: 2.850764E+00 | grad norm: 0.340 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.273 | TFLOPs: 29.45 | +0: steps: 4000 loss: 2.8028 iter time (s): 0.305 samples/sec: 837.979 +7: ------------------------------------------------------------------------------------------ +7: valid loss at iteration 4000 | lm loss value: 2.918512E+00 | lm loss PPL: 1.851371E+01 | +7: ------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 4000 to checkpoints_146m +0: [2022-11-25 17:53:11,430] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step4000 is begin to save! +0: [2022-11-25 17:53:11,434] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_01-model_00-model_states.pt... +0: [2022-11-25 17:53:11,555] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_01-model_00-model_states.pt. +0: [2022-11-25 17:53:11,556] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_03-model_00-model_states.pt... +0: [2022-11-25 17:53:11,578] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_03-model_00-model_states.pt. +0: [2022-11-25 17:53:11,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_04-model_00-model_states.pt... +0: [2022-11-25 17:53:11,599] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_04-model_00-model_states.pt. +0: [2022-11-25 17:53:11,599] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_05-model_00-model_states.pt... +0: [2022-11-25 17:53:11,620] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_05-model_00-model_states.pt. +0: [2022-11-25 17:53:11,620] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_06-model_00-model_states.pt... +0: [2022-11-25 17:53:11,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_06-model_00-model_states.pt. +0: [2022-11-25 17:53:11,641] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_07-model_00-model_states.pt... +0: [2022-11-25 17:53:11,661] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_07-model_00-model_states.pt. +0: [2022-11-25 17:53:11,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_08-model_00-model_states.pt... +0: [2022-11-25 17:53:11,682] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_08-model_00-model_states.pt. +0: [2022-11-25 17:53:11,683] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_09-model_00-model_states.pt... +0: [2022-11-25 17:53:11,703] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_09-model_00-model_states.pt. +0: [2022-11-25 17:53:11,703] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_10-model_00-model_states.pt... +0: [2022-11-25 17:53:11,724] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_10-model_00-model_states.pt. +0: [2022-11-25 17:53:11,724] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_11-model_00-model_states.pt... +0: [2022-11-25 17:53:11,745] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_11-model_00-model_states.pt. +0: [2022-11-25 17:53:11,745] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_12-model_00-model_states.pt... +0: [2022-11-25 17:53:11,766] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_12-model_00-model_states.pt. +0: [2022-11-25 17:53:11,766] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_13-model_00-model_states.pt... +0: [2022-11-25 17:53:11,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_13-model_00-model_states.pt. +0: [2022-11-25 17:53:11,787] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_14-model_00-model_states.pt... +0: [2022-11-25 17:53:11,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_14-model_00-model_states.pt. +0: [2022-11-25 17:53:11,808] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_15-model_00-model_states.pt... +0: [2022-11-25 17:53:11,828] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_15-model_00-model_states.pt. +0: [2022-11-25 17:53:11,829] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_16-model_00-model_states.pt... +0: [2022-11-25 17:53:11,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_16-model_00-model_states.pt. +0: [2022-11-25 17:53:11,849] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_17-model_00-model_states.pt... +0: [2022-11-25 17:53:11,870] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_17-model_00-model_states.pt. +0: [2022-11-25 17:53:11,870] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/layer_19-model_00-model_states.pt... +0: [2022-11-25 17:53:11,871] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/layer_19-model_00-model_states.pt. +0: [2022-11-25 17:53:11,873] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step4000/mp_rank_00_model_states.pt +0: [2022-11-25 17:53:11,873] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/mp_rank_00_model_states.pt... +0: [2022-11-25 17:53:11,876] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/mp_rank_00_model_states.pt. +0: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:53:11,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step4000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:53:11,927] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:53:11,927] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:53:11,927] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +4: [2022-11-25 17:53:11,927] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +5: [2022-11-25 17:53:11,927] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2022-11-25 17:53:11,927] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2022-11-25 17:53:11,927] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:53:11,927] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +5: [2022-11-25 17:53:11,927] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:53:11,927] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2022-11-25 17:53:11,927] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 17:53:11,927] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2022-11-25 17:53:11,927] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:53:11,927] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3: [2022-11-25 17:53:11,927] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:53:11,927] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2022-11-25 17:53:11,927] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 17:53:11,927] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2022-11-25 17:53:11,928] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:53:11,928] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:53:11,928] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 17:53:11,928] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 17:53:11,928] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2022-11-25 17:53:11,928] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2022-11-25 17:53:11,928] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:53:11,928] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:53:11,928] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 17:53:11,929] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2022-11-25 17:53:11,929] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:53:11,929] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 17:53:11,929] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2022-11-25 17:53:11,930] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:53:11,930] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 17:53:11,930] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2022-11-25 17:53:11,930] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:53:11,931] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 17:53:11,930] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:53:11,931] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2022-11-25 17:53:11,931] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 17:53:11,931] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2022-11-25 17:53:11,931] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:53:11,932] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 17:53:11,932] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2022-11-25 17:53:11,932] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:53:11,932] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +0: [2022-11-25 17:53:11,932] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:53:11,932] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3: [2022-11-25 17:53:11,932] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2022-11-25 17:53:11,932] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2022-11-25 17:53:11,932] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:53:11,932] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 17:53:11,932] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:53:11,933] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +0: [2022-11-25 17:53:11,933] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1: [2022-11-25 17:53:11,933] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:53:11,933] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 17:53:11,933] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:53:11,933] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 17:53:11,934] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2022-11-25 17:53:11,934] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:53:11,929] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:53:11,931] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:53:11,929] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:53:11,929] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:53:11,934] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +4: [2022-11-25 17:53:11,929] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +2: [2022-11-25 17:53:11,931] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +6: [2022-11-25 17:53:11,929] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +0: [2022-11-25 17:53:11,934] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2022-11-25 17:53:11,929] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:53:11,931] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2022-11-25 17:53:11,929] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 17:53:11,929] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2022-11-25 17:53:11,929] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2022-11-25 17:53:11,931] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:53:11,929] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2022-11-25 17:53:11,929] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +2: [2022-11-25 17:53:11,932] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +6: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:53:11,929] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2022-11-25 17:53:11,932] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2022-11-25 17:53:11,933] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +4: [2022-11-25 17:53:11,930] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:53:11,933] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +4: [2022-11-25 17:53:11,930] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +6: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2022-11-25 17:53:11,930] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:53:11,931] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:53:11,933] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +4: [2022-11-25 17:53:11,931] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +6: [2022-11-25 17:53:11,933] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2022-11-25 17:53:11,931] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2022-11-25 17:53:11,934] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:53:11,934] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:53:11,934] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:53:11,934] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +0: [2022-11-25 17:53:11,934] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:53:11,934] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +1: [2022-11-25 17:53:11,934] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:53:11,934] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +0: [2022-11-25 17:53:11,935] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +4: [2022-11-25 17:53:11,934] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2022-11-25 17:53:11,934] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2022-11-25 17:53:11,935] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2022-11-25 17:53:11,935] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +6: [2022-11-25 17:53:11,934] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2022-11-25 17:53:11,935] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2022-11-25 17:53:11,935] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:53:11,935] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 17:53:11,935] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2022-11-25 17:53:11,936] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:53:11,936] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:53:11,936] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +1: [2022-11-25 17:53:11,936] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +7: [2022-11-25 17:53:11,936] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2022-11-25 17:53:11,936] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2022-11-25 17:53:11,936] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:53:11,936] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:53:11,936] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:53:11,936] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 17:53:11,936] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 17:53:11,936] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 17:53:11,936] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2022-11-25 17:53:11,936] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2022-11-25 17:53:11,936] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2022-11-25 17:53:11,937] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:53:11,938] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2022-11-25 17:53:11,938] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2022-11-25 17:53:11,938] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:53:11,938] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:53:11,938] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +5: [2022-11-25 17:53:11,938] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +3: [2022-11-25 17:53:11,938] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2022-11-25 17:53:11,938] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2022-11-25 17:53:11,938] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:53:11,938] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 17:53:11,938] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2022-11-25 17:53:11,938] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:53:11,938] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 17:53:11,938] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2022-11-25 17:53:11,938] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:53:11,939] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 17:53:11,939] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2022-11-25 17:53:11,940] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:53:11,940] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:53:11,940] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:53:11,940] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:53:11,940] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 17:53:11,940] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 17:53:11,940] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 17:53:11,940] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 17:53:11,940] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2022-11-25 17:53:11,940] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2022-11-25 17:53:11,940] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2022-11-25 17:53:11,940] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2022-11-25 17:53:11,941] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:53:11,941] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:53:11,941] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:53:11,942] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2022-11-25 17:53:11,942] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 17:53:11,942] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 17:53:11,942] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2022-11-25 17:53:11,942] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2022-11-25 17:53:11,942] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2022-11-25 17:53:11,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 17:53:11,954] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2022-11-25 17:53:11,969] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:53:11,969] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 17:53:11,969] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2022-11-25 17:53:11,973] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:53:11,973] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 17:53:11,974] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: successfully saved checkpoint at iteration 4000 to checkpoints_146m +7: time (ms) | save-checkpoint: 548.64 +4: [2022-11-25 17:53:11,935] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:53:11,935] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 17:53:11,935] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2022-11-25 17:53:11,940] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:53:11,940] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step4000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 17:53:11,940] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: iteration 4010/ 21553 | consumed samples: 1026560 | consumed tokens: 2102394880 | elapsed time per iteration (s): 0.37 | learning rate: 1.863E-04 | global batch size: 256 | lm loss: 2.861959E+00 | grad norm: 0.374 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 695.228 | TFLOPs: 24.34 | +7: iteration 4020/ 21553 | consumed samples: 1029120 | consumed tokens: 2107637760 | elapsed time per iteration (s): 0.31 | learning rate: 1.862E-04 | global batch size: 256 | lm loss: 2.894744E+00 | grad norm: 0.387 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.102 | TFLOPs: 29.37 | +7: iteration 4030/ 21553 | consumed samples: 1031680 | consumed tokens: 2112880640 | elapsed time per iteration (s): 0.30 | learning rate: 1.862E-04 | global batch size: 256 | lm loss: 2.862086E+00 | grad norm: 0.393 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.516 | TFLOPs: 29.49 | +7: iteration 4040/ 21553 | consumed samples: 1034240 | consumed tokens: 2118123520 | elapsed time per iteration (s): 0.30 | learning rate: 1.861E-04 | global batch size: 256 | lm loss: 2.832657E+00 | grad norm: 0.358 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.010 | TFLOPs: 29.41 | +7: iteration 4050/ 21553 | consumed samples: 1036800 | consumed tokens: 2123366400 | elapsed time per iteration (s): 0.30 | learning rate: 1.860E-04 | global batch size: 256 | lm loss: 2.843352E+00 | grad norm: 0.394 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.513 | TFLOPs: 29.63 | +7: iteration 4060/ 21553 | consumed samples: 1039360 | consumed tokens: 2128609280 | elapsed time per iteration (s): 0.32 | learning rate: 1.860E-04 | global batch size: 256 | lm loss: 2.886456E+00 | grad norm: 0.389 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 801.396 | TFLOPs: 28.05 | +7: iteration 4070/ 21553 | consumed samples: 1041920 | consumed tokens: 2133852160 | elapsed time per iteration (s): 0.31 | learning rate: 1.859E-04 | global batch size: 256 | lm loss: 2.894281E+00 | grad norm: 0.381 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.679 | TFLOPs: 28.90 | +7: iteration 4080/ 21553 | consumed samples: 1044480 | consumed tokens: 2139095040 | elapsed time per iteration (s): 0.31 | learning rate: 1.858E-04 | global batch size: 256 | lm loss: 2.864436E+00 | grad norm: 0.378 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.518 | TFLOPs: 28.97 | +7: iteration 4090/ 21553 | consumed samples: 1047040 | consumed tokens: 2144337920 | elapsed time per iteration (s): 0.31 | learning rate: 1.857E-04 | global batch size: 256 | lm loss: 2.870896E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.802 | TFLOPs: 28.49 | +7: iteration 4100/ 21553 | consumed samples: 1049600 | consumed tokens: 2149580800 | elapsed time per iteration (s): 0.31 | learning rate: 1.857E-04 | global batch size: 256 | lm loss: 2.862360E+00 | grad norm: 0.373 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.663 | TFLOPs: 29.18 | +7: iteration 4110/ 21553 | consumed samples: 1052160 | consumed tokens: 2154823680 | elapsed time per iteration (s): 0.31 | learning rate: 1.856E-04 | global batch size: 256 | lm loss: 2.827076E+00 | grad norm: 0.367 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.000 | TFLOPs: 28.67 | +7: iteration 4120/ 21553 | consumed samples: 1054720 | consumed tokens: 2160066560 | elapsed time per iteration (s): 0.31 | learning rate: 1.855E-04 | global batch size: 256 | lm loss: 2.821296E+00 | grad norm: 0.368 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.541 | TFLOPs: 28.69 | +7: iteration 4130/ 21553 | consumed samples: 1057280 | consumed tokens: 2165309440 | elapsed time per iteration (s): 0.33 | learning rate: 1.855E-04 | global batch size: 256 | lm loss: 2.826405E+00 | grad norm: 0.360 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 778.481 | TFLOPs: 27.25 | +7: iteration 4140/ 21553 | consumed samples: 1059840 | consumed tokens: 2170552320 | elapsed time per iteration (s): 0.33 | learning rate: 1.854E-04 | global batch size: 256 | lm loss: 2.846785E+00 | grad norm: 0.489 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 784.962 | TFLOPs: 27.48 | +7: iteration 4150/ 21553 | consumed samples: 1062400 | consumed tokens: 2175795200 | elapsed time per iteration (s): 0.30 | learning rate: 1.853E-04 | global batch size: 256 | lm loss: 2.861586E+00 | grad norm: 0.372 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.638 | TFLOPs: 29.67 | +7: iteration 4160/ 21553 | consumed samples: 1064960 | consumed tokens: 2181038080 | elapsed time per iteration (s): 0.30 | learning rate: 1.852E-04 | global batch size: 256 | lm loss: 2.859305E+00 | grad norm: 0.394 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.794 | TFLOPs: 29.43 | +7: iteration 4170/ 21553 | consumed samples: 1067520 | consumed tokens: 2186280960 | elapsed time per iteration (s): 0.31 | learning rate: 1.852E-04 | global batch size: 256 | lm loss: 2.834217E+00 | grad norm: 0.348 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.540 | TFLOPs: 29.35 | +7: iteration 4180/ 21553 | consumed samples: 1070080 | consumed tokens: 2191523840 | elapsed time per iteration (s): 0.31 | learning rate: 1.851E-04 | global batch size: 256 | lm loss: 2.852509E+00 | grad norm: 0.348 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.858 | TFLOPs: 29.30 | +7: iteration 4190/ 21553 | consumed samples: 1072640 | consumed tokens: 2196766720 | elapsed time per iteration (s): 0.31 | learning rate: 1.850E-04 | global batch size: 256 | lm loss: 2.817438E+00 | grad norm: 0.356 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.868 | TFLOPs: 28.98 | +7: iteration 4200/ 21553 | consumed samples: 1075200 | consumed tokens: 2202009600 | elapsed time per iteration (s): 0.31 | learning rate: 1.850E-04 | global batch size: 256 | lm loss: 2.852826E+00 | grad norm: 0.365 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.598 | TFLOPs: 29.22 | +7: iteration 4210/ 21553 | consumed samples: 1077760 | consumed tokens: 2207252480 | elapsed time per iteration (s): 0.31 | learning rate: 1.849E-04 | global batch size: 256 | lm loss: 2.851560E+00 | grad norm: 0.374 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.499 | TFLOPs: 29.21 | +7: iteration 4220/ 21553 | consumed samples: 1080320 | consumed tokens: 2212495360 | elapsed time per iteration (s): 0.30 | learning rate: 1.848E-04 | global batch size: 256 | lm loss: 2.869952E+00 | grad norm: 0.353 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.361 | TFLOPs: 29.49 | +7: iteration 4230/ 21553 | consumed samples: 1082880 | consumed tokens: 2217738240 | elapsed time per iteration (s): 0.31 | learning rate: 1.847E-04 | global batch size: 256 | lm loss: 2.851041E+00 | grad norm: 0.382 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.683 | TFLOPs: 29.32 | +7: iteration 4240/ 21553 | consumed samples: 1085440 | consumed tokens: 2222981120 | elapsed time per iteration (s): 0.30 | learning rate: 1.847E-04 | global batch size: 256 | lm loss: 2.848520E+00 | grad norm: 0.394 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.360 | TFLOPs: 30.01 | +7: iteration 4250/ 21553 | consumed samples: 1088000 | consumed tokens: 2228224000 | elapsed time per iteration (s): 0.31 | learning rate: 1.846E-04 | global batch size: 256 | lm loss: 2.828146E+00 | grad norm: 0.323 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.590 | TFLOPs: 29.08 | +7: iteration 4260/ 21553 | consumed samples: 1090560 | consumed tokens: 2233466880 | elapsed time per iteration (s): 0.30 | learning rate: 1.845E-04 | global batch size: 256 | lm loss: 2.855270E+00 | grad norm: 0.344 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.958 | TFLOPs: 29.68 | +7: iteration 4270/ 21553 | consumed samples: 1093120 | consumed tokens: 2238709760 | elapsed time per iteration (s): 0.31 | learning rate: 1.844E-04 | global batch size: 256 | lm loss: 2.812370E+00 | grad norm: 0.355 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.378 | TFLOPs: 29.03 | +7: iteration 4280/ 21553 | consumed samples: 1095680 | consumed tokens: 2243952640 | elapsed time per iteration (s): 0.30 | learning rate: 1.844E-04 | global batch size: 256 | lm loss: 2.850959E+00 | grad norm: 0.384 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.269 | TFLOPs: 29.45 | +7: iteration 4290/ 21553 | consumed samples: 1098240 | consumed tokens: 2249195520 | elapsed time per iteration (s): 0.31 | learning rate: 1.843E-04 | global batch size: 256 | lm loss: 2.836061E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.646 | TFLOPs: 29.25 | +7: iteration 4300/ 21553 | consumed samples: 1100800 | consumed tokens: 2254438400 | elapsed time per iteration (s): 0.31 | learning rate: 1.842E-04 | global batch size: 256 | lm loss: 2.851937E+00 | grad norm: 0.388 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.386 | TFLOPs: 29.14 | +7: iteration 4310/ 21553 | consumed samples: 1103360 | consumed tokens: 2259681280 | elapsed time per iteration (s): 0.31 | learning rate: 1.841E-04 | global batch size: 256 | lm loss: 2.839665E+00 | grad norm: 0.343 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.653 | TFLOPs: 29.08 | +7: iteration 4320/ 21553 | consumed samples: 1105920 | consumed tokens: 2264924160 | elapsed time per iteration (s): 0.32 | learning rate: 1.841E-04 | global batch size: 256 | lm loss: 2.836734E+00 | grad norm: 0.347 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 811.804 | TFLOPs: 28.42 | +7: iteration 4330/ 21553 | consumed samples: 1108480 | consumed tokens: 2270167040 | elapsed time per iteration (s): 0.30 | learning rate: 1.840E-04 | global batch size: 256 | lm loss: 2.810205E+00 | grad norm: 0.362 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.195 | TFLOPs: 29.45 | +7: iteration 4340/ 21553 | consumed samples: 1111040 | consumed tokens: 2275409920 | elapsed time per iteration (s): 0.30 | learning rate: 1.839E-04 | global batch size: 256 | lm loss: 2.872846E+00 | grad norm: 0.330 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.906 | TFLOPs: 29.58 | +7: iteration 4350/ 21553 | consumed samples: 1113600 | consumed tokens: 2280652800 | elapsed time per iteration (s): 0.31 | learning rate: 1.838E-04 | global batch size: 256 | lm loss: 2.845146E+00 | grad norm: 0.340 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.798 | TFLOPs: 29.12 | +7: iteration 4360/ 21553 | consumed samples: 1116160 | consumed tokens: 2285895680 | elapsed time per iteration (s): 0.31 | learning rate: 1.838E-04 | global batch size: 256 | lm loss: 2.836543E+00 | grad norm: 0.379 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.868 | TFLOPs: 28.77 | +7: iteration 4370/ 21553 | consumed samples: 1118720 | consumed tokens: 2291138560 | elapsed time per iteration (s): 0.31 | learning rate: 1.837E-04 | global batch size: 256 | lm loss: 2.871127E+00 | grad norm: 0.371 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.464 | TFLOPs: 29.07 | +7: iteration 4380/ 21553 | consumed samples: 1121280 | consumed tokens: 2296381440 | elapsed time per iteration (s): 0.31 | learning rate: 1.836E-04 | global batch size: 256 | lm loss: 2.832903E+00 | grad norm: 0.330 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.905 | TFLOPs: 29.37 | +7: iteration 4390/ 21553 | consumed samples: 1123840 | consumed tokens: 2301624320 | elapsed time per iteration (s): 0.31 | learning rate: 1.835E-04 | global batch size: 256 | lm loss: 2.867123E+00 | grad norm: 0.370 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.628 | TFLOPs: 29.08 | +7: iteration 4400/ 21553 | consumed samples: 1126400 | consumed tokens: 2306867200 | elapsed time per iteration (s): 0.31 | learning rate: 1.835E-04 | global batch size: 256 | lm loss: 2.845667E+00 | grad norm: 0.366 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.486 | TFLOPs: 29.21 | +7: iteration 4410/ 21553 | consumed samples: 1128960 | consumed tokens: 2312110080 | elapsed time per iteration (s): 0.31 | learning rate: 1.834E-04 | global batch size: 256 | lm loss: 2.819373E+00 | grad norm: 0.348 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.845 | TFLOPs: 29.37 | +7: iteration 4420/ 21553 | consumed samples: 1131520 | consumed tokens: 2317352960 | elapsed time per iteration (s): 0.31 | learning rate: 1.833E-04 | global batch size: 256 | lm loss: 2.832657E+00 | grad norm: 0.365 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.888 | TFLOPs: 28.56 | +7: iteration 4430/ 21553 | consumed samples: 1134080 | consumed tokens: 2322595840 | elapsed time per iteration (s): 0.31 | learning rate: 1.832E-04 | global batch size: 256 | lm loss: 2.807343E+00 | grad norm: 0.366 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.641 | TFLOPs: 29.32 | +7: iteration 4440/ 21553 | consumed samples: 1136640 | consumed tokens: 2327838720 | elapsed time per iteration (s): 0.30 | learning rate: 1.831E-04 | global batch size: 256 | lm loss: 2.843810E+00 | grad norm: 0.321 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.845 | TFLOPs: 29.96 | +7: iteration 4450/ 21553 | consumed samples: 1139200 | consumed tokens: 2333081600 | elapsed time per iteration (s): 0.31 | learning rate: 1.831E-04 | global batch size: 256 | lm loss: 2.847478E+00 | grad norm: 0.356 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.767 | TFLOPs: 29.33 | +7: iteration 4460/ 21553 | consumed samples: 1141760 | consumed tokens: 2338324480 | elapsed time per iteration (s): 0.30 | learning rate: 1.830E-04 | global batch size: 256 | lm loss: 2.779046E+00 | grad norm: 0.349 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.335 | TFLOPs: 29.91 | +7: iteration 4470/ 21553 | consumed samples: 1144320 | consumed tokens: 2343567360 | elapsed time per iteration (s): 0.31 | learning rate: 1.829E-04 | global batch size: 256 | lm loss: 2.822653E+00 | grad norm: 0.330 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.528 | TFLOPs: 29.07 | +7: iteration 4480/ 21553 | consumed samples: 1146880 | consumed tokens: 2348810240 | elapsed time per iteration (s): 0.31 | learning rate: 1.828E-04 | global batch size: 256 | lm loss: 2.841912E+00 | grad norm: 0.368 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.602 | TFLOPs: 29.36 | +7: iteration 4490/ 21553 | consumed samples: 1149440 | consumed tokens: 2354053120 | elapsed time per iteration (s): 0.30 | learning rate: 1.828E-04 | global batch size: 256 | lm loss: 2.796008E+00 | grad norm: 0.358 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.604 | TFLOPs: 29.50 | +7: iteration 4500/ 21553 | consumed samples: 1152000 | consumed tokens: 2359296000 | elapsed time per iteration (s): 0.31 | learning rate: 1.827E-04 | global batch size: 256 | lm loss: 2.829115E+00 | grad norm: 0.382 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.719 | TFLOPs: 29.19 | +7: iteration 4510/ 21553 | consumed samples: 1154560 | consumed tokens: 2364538880 | elapsed time per iteration (s): 0.31 | learning rate: 1.826E-04 | global batch size: 256 | lm loss: 2.807838E+00 | grad norm: 0.347 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.395 | TFLOPs: 28.82 | +7: iteration 4520/ 21553 | consumed samples: 1157120 | consumed tokens: 2369781760 | elapsed time per iteration (s): 0.30 | learning rate: 1.825E-04 | global batch size: 256 | lm loss: 2.833880E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.931 | TFLOPs: 29.40 | +7: iteration 4530/ 21553 | consumed samples: 1159680 | consumed tokens: 2375024640 | elapsed time per iteration (s): 0.31 | learning rate: 1.824E-04 | global batch size: 256 | lm loss: 2.856962E+00 | grad norm: 0.369 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.546 | TFLOPs: 28.87 | +7: iteration 4540/ 21553 | consumed samples: 1162240 | consumed tokens: 2380267520 | elapsed time per iteration (s): 0.31 | learning rate: 1.824E-04 | global batch size: 256 | lm loss: 2.837911E+00 | grad norm: 0.346 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.823 | TFLOPs: 29.33 | +7: iteration 4550/ 21553 | consumed samples: 1164800 | consumed tokens: 2385510400 | elapsed time per iteration (s): 0.31 | learning rate: 1.823E-04 | global batch size: 256 | lm loss: 2.809662E+00 | grad norm: 0.367 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.725 | TFLOPs: 28.49 | +7: iteration 4560/ 21553 | consumed samples: 1167360 | consumed tokens: 2390753280 | elapsed time per iteration (s): 0.31 | learning rate: 1.822E-04 | global batch size: 256 | lm loss: 2.827585E+00 | grad norm: 0.348 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.631 | TFLOPs: 28.87 | +7: iteration 4570/ 21553 | consumed samples: 1169920 | consumed tokens: 2395996160 | elapsed time per iteration (s): 0.31 | learning rate: 1.821E-04 | global batch size: 256 | lm loss: 2.811715E+00 | grad norm: 0.368 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.338 | TFLOPs: 28.58 | +7: iteration 4580/ 21553 | consumed samples: 1172480 | consumed tokens: 2401239040 | elapsed time per iteration (s): 0.33 | learning rate: 1.820E-04 | global batch size: 256 | lm loss: 2.833770E+00 | grad norm: 0.330 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 779.965 | TFLOPs: 27.30 | +7: iteration 4590/ 21553 | consumed samples: 1175040 | consumed tokens: 2406481920 | elapsed time per iteration (s): 0.30 | learning rate: 1.820E-04 | global batch size: 256 | lm loss: 2.782914E+00 | grad norm: 0.361 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.690 | TFLOPs: 29.57 | +7: iteration 4600/ 21553 | consumed samples: 1177600 | consumed tokens: 2411724800 | elapsed time per iteration (s): 0.31 | learning rate: 1.819E-04 | global batch size: 256 | lm loss: 2.850486E+00 | grad norm: 0.350 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.252 | TFLOPs: 29.13 | +7: iteration 4610/ 21553 | consumed samples: 1180160 | consumed tokens: 2416967680 | elapsed time per iteration (s): 0.30 | learning rate: 1.818E-04 | global batch size: 256 | lm loss: 2.820421E+00 | grad norm: 0.386 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.338 | TFLOPs: 29.66 | +7: iteration 4620/ 21553 | consumed samples: 1182720 | consumed tokens: 2422210560 | elapsed time per iteration (s): 0.31 | learning rate: 1.817E-04 | global batch size: 256 | lm loss: 2.826736E+00 | grad norm: 0.349 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.248 | TFLOPs: 29.10 | +7: iteration 4630/ 21553 | consumed samples: 1185280 | consumed tokens: 2427453440 | elapsed time per iteration (s): 0.32 | learning rate: 1.816E-04 | global batch size: 256 | lm loss: 2.842763E+00 | grad norm: 0.372 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 803.228 | TFLOPs: 28.12 | +7: iteration 4640/ 21553 | consumed samples: 1187840 | consumed tokens: 2432696320 | elapsed time per iteration (s): 0.31 | learning rate: 1.816E-04 | global batch size: 256 | lm loss: 2.855034E+00 | grad norm: 0.359 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.337 | TFLOPs: 29.28 | +7: iteration 4650/ 21553 | consumed samples: 1190400 | consumed tokens: 2437939200 | elapsed time per iteration (s): 0.31 | learning rate: 1.815E-04 | global batch size: 256 | lm loss: 2.811374E+00 | grad norm: 0.360 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.516 | TFLOPs: 28.76 | +7: iteration 4660/ 21553 | consumed samples: 1192960 | consumed tokens: 2443182080 | elapsed time per iteration (s): 0.31 | learning rate: 1.814E-04 | global batch size: 256 | lm loss: 2.810006E+00 | grad norm: 0.322 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.112 | TFLOPs: 29.20 | +7: iteration 4670/ 21553 | consumed samples: 1195520 | consumed tokens: 2448424960 | elapsed time per iteration (s): 0.31 | learning rate: 1.813E-04 | global batch size: 256 | lm loss: 2.789421E+00 | grad norm: 0.357 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.481 | TFLOPs: 29.00 | +7: iteration 4680/ 21553 | consumed samples: 1198080 | consumed tokens: 2453667840 | elapsed time per iteration (s): 0.30 | learning rate: 1.812E-04 | global batch size: 256 | lm loss: 2.808827E+00 | grad norm: 0.367 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.628 | TFLOPs: 29.81 | +7: iteration 4690/ 21553 | consumed samples: 1200640 | consumed tokens: 2458910720 | elapsed time per iteration (s): 0.31 | learning rate: 1.812E-04 | global batch size: 256 | lm loss: 2.834137E+00 | grad norm: 0.359 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.862 | TFLOPs: 29.02 | +7: iteration 4700/ 21553 | consumed samples: 1203200 | consumed tokens: 2464153600 | elapsed time per iteration (s): 0.31 | learning rate: 1.811E-04 | global batch size: 256 | lm loss: 2.816044E+00 | grad norm: 0.345 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.050 | TFLOPs: 29.02 | +7: iteration 4710/ 21553 | consumed samples: 1205760 | consumed tokens: 2469396480 | elapsed time per iteration (s): 0.31 | learning rate: 1.810E-04 | global batch size: 256 | lm loss: 2.792964E+00 | grad norm: 0.364 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.404 | TFLOPs: 29.14 | +7: iteration 4720/ 21553 | consumed samples: 1208320 | consumed tokens: 2474639360 | elapsed time per iteration (s): 0.31 | learning rate: 1.809E-04 | global batch size: 256 | lm loss: 2.812360E+00 | grad norm: 0.371 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.576 | TFLOPs: 29.29 | +7: iteration 4730/ 21553 | consumed samples: 1210880 | consumed tokens: 2479882240 | elapsed time per iteration (s): 0.31 | learning rate: 1.808E-04 | global batch size: 256 | lm loss: 2.786519E+00 | grad norm: 0.345 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.189 | TFLOPs: 29.06 | +7: iteration 4740/ 21553 | consumed samples: 1213440 | consumed tokens: 2485125120 | elapsed time per iteration (s): 0.31 | learning rate: 1.808E-04 | global batch size: 256 | lm loss: 2.810740E+00 | grad norm: 0.349 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.782 | TFLOPs: 29.01 | +7: iteration 4750/ 21553 | consumed samples: 1216000 | consumed tokens: 2490368000 | elapsed time per iteration (s): 0.30 | learning rate: 1.807E-04 | global batch size: 256 | lm loss: 2.803187E+00 | grad norm: 0.346 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.858 | TFLOPs: 29.61 | +7: iteration 4760/ 21553 | consumed samples: 1218560 | consumed tokens: 2495610880 | elapsed time per iteration (s): 0.30 | learning rate: 1.806E-04 | global batch size: 256 | lm loss: 2.811773E+00 | grad norm: 0.362 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.963 | TFLOPs: 29.54 | +7: iteration 4770/ 21553 | consumed samples: 1221120 | consumed tokens: 2500853760 | elapsed time per iteration (s): 0.31 | learning rate: 1.805E-04 | global batch size: 256 | lm loss: 2.839690E+00 | grad norm: 0.357 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.216 | TFLOPs: 28.92 | +7: iteration 4780/ 21553 | consumed samples: 1223680 | consumed tokens: 2506096640 | elapsed time per iteration (s): 0.31 | learning rate: 1.804E-04 | global batch size: 256 | lm loss: 2.793161E+00 | grad norm: 0.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.346 | TFLOPs: 28.86 | +7: iteration 4790/ 21553 | consumed samples: 1226240 | consumed tokens: 2511339520 | elapsed time per iteration (s): 0.31 | learning rate: 1.803E-04 | global batch size: 256 | lm loss: 2.817276E+00 | grad norm: 0.393 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.746 | TFLOPs: 29.33 | +7: iteration 4800/ 21553 | consumed samples: 1228800 | consumed tokens: 2516582400 | elapsed time per iteration (s): 0.30 | learning rate: 1.803E-04 | global batch size: 256 | lm loss: 2.832806E+00 | grad norm: 0.375 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.830 | TFLOPs: 29.54 | +7: iteration 4810/ 21553 | consumed samples: 1231360 | consumed tokens: 2521825280 | elapsed time per iteration (s): 0.31 | learning rate: 1.802E-04 | global batch size: 256 | lm loss: 2.818481E+00 | grad norm: 0.352 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.654 | TFLOPs: 28.90 | +7: iteration 4820/ 21553 | consumed samples: 1233920 | consumed tokens: 2527068160 | elapsed time per iteration (s): 0.30 | learning rate: 1.801E-04 | global batch size: 256 | lm loss: 2.800968E+00 | grad norm: 0.337 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.286 | TFLOPs: 30.01 | +7: iteration 4830/ 21553 | consumed samples: 1236480 | consumed tokens: 2532311040 | elapsed time per iteration (s): 0.31 | learning rate: 1.800E-04 | global batch size: 256 | lm loss: 2.789679E+00 | grad norm: 0.355 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.707 | TFLOPs: 29.22 | +7: iteration 4840/ 21553 | consumed samples: 1239040 | consumed tokens: 2537553920 | elapsed time per iteration (s): 0.30 | learning rate: 1.799E-04 | global batch size: 256 | lm loss: 2.766528E+00 | grad norm: 0.359 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.084 | TFLOPs: 29.86 | +7: iteration 4850/ 21553 | consumed samples: 1241600 | consumed tokens: 2542796800 | elapsed time per iteration (s): 0.31 | learning rate: 1.798E-04 | global batch size: 256 | lm loss: 2.799014E+00 | grad norm: 0.322 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.090 | TFLOPs: 29.34 | +7: iteration 4860/ 21553 | consumed samples: 1244160 | consumed tokens: 2548039680 | elapsed time per iteration (s): 0.30 | learning rate: 1.798E-04 | global batch size: 256 | lm loss: 2.833667E+00 | grad norm: 0.338 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.748 | TFLOPs: 29.68 | +7: iteration 4870/ 21553 | consumed samples: 1246720 | consumed tokens: 2553282560 | elapsed time per iteration (s): 0.31 | learning rate: 1.797E-04 | global batch size: 256 | lm loss: 2.791080E+00 | grad norm: 0.351 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.127 | TFLOPs: 28.96 | +7: iteration 4880/ 21553 | consumed samples: 1249280 | consumed tokens: 2558525440 | elapsed time per iteration (s): 0.30 | learning rate: 1.796E-04 | global batch size: 256 | lm loss: 2.836643E+00 | grad norm: 0.365 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.478 | TFLOPs: 29.56 | +7: iteration 4890/ 21553 | consumed samples: 1251840 | consumed tokens: 2563768320 | elapsed time per iteration (s): 0.31 | learning rate: 1.795E-04 | global batch size: 256 | lm loss: 2.811197E+00 | grad norm: 0.351 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.475 | TFLOPs: 28.83 | +7: iteration 4900/ 21553 | consumed samples: 1254400 | consumed tokens: 2569011200 | elapsed time per iteration (s): 0.30 | learning rate: 1.794E-04 | global batch size: 256 | lm loss: 2.782685E+00 | grad norm: 0.385 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.822 | TFLOPs: 29.82 | +7: iteration 4910/ 21553 | consumed samples: 1256960 | consumed tokens: 2574254080 | elapsed time per iteration (s): 0.31 | learning rate: 1.793E-04 | global batch size: 256 | lm loss: 2.769319E+00 | grad norm: 0.344 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.551 | TFLOPs: 29.18 | +7: iteration 4920/ 21553 | consumed samples: 1259520 | consumed tokens: 2579496960 | elapsed time per iteration (s): 0.30 | learning rate: 1.793E-04 | global batch size: 256 | lm loss: 2.820488E+00 | grad norm: 0.350 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.070 | TFLOPs: 29.48 | +7: iteration 4930/ 21553 | consumed samples: 1262080 | consumed tokens: 2584739840 | elapsed time per iteration (s): 0.31 | learning rate: 1.792E-04 | global batch size: 256 | lm loss: 2.794538E+00 | grad norm: 0.333 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.914 | TFLOPs: 28.84 | +7: iteration 4940/ 21553 | consumed samples: 1264640 | consumed tokens: 2589982720 | elapsed time per iteration (s): 0.30 | learning rate: 1.791E-04 | global batch size: 256 | lm loss: 2.805577E+00 | grad norm: 0.363 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.953 | TFLOPs: 29.79 | +7: iteration 4950/ 21553 | consumed samples: 1267200 | consumed tokens: 2595225600 | elapsed time per iteration (s): 0.31 | learning rate: 1.790E-04 | global batch size: 256 | lm loss: 2.831097E+00 | grad norm: 0.339 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.090 | TFLOPs: 28.67 | +7: iteration 4960/ 21553 | consumed samples: 1269760 | consumed tokens: 2600468480 | elapsed time per iteration (s): 0.31 | learning rate: 1.789E-04 | global batch size: 256 | lm loss: 2.798101E+00 | grad norm: 0.322 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.939 | TFLOPs: 28.84 | +7: iteration 4970/ 21553 | consumed samples: 1272320 | consumed tokens: 2605711360 | elapsed time per iteration (s): 0.30 | learning rate: 1.788E-04 | global batch size: 256 | lm loss: 2.791006E+00 | grad norm: 0.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.665 | TFLOPs: 29.43 | +7: iteration 4980/ 21553 | consumed samples: 1274880 | consumed tokens: 2610954240 | elapsed time per iteration (s): 0.30 | learning rate: 1.787E-04 | global batch size: 256 | lm loss: 2.806308E+00 | grad norm: 0.349 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.020 | TFLOPs: 29.58 | +7: iteration 4990/ 21553 | consumed samples: 1277440 | consumed tokens: 2616197120 | elapsed time per iteration (s): 0.30 | learning rate: 1.787E-04 | global batch size: 256 | lm loss: 2.783210E+00 | grad norm: 0.333 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.198 | TFLOPs: 29.41 | +7: iteration 5000/ 21553 | consumed samples: 1280000 | consumed tokens: 2621440000 | elapsed time per iteration (s): 0.30 | learning rate: 1.786E-04 | global batch size: 256 | lm loss: 2.779180E+00 | grad norm: 0.347 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.386 | TFLOPs: 30.08 | +7: ------------------------------------------------------------------------------------------ +7: valid loss at iteration 5000 | lm loss value: 2.795156E+00 | lm loss PPL: 1.636518E+01 | +7: ------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 5000 to checkpoints_146m +0: [2022-11-25 17:58:19,428] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step5000 is begin to save! +0: [2022-11-25 17:58:19,433] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_01-model_00-model_states.pt... +0: [2022-11-25 17:58:19,515] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_01-model_00-model_states.pt. +0: [2022-11-25 17:58:19,515] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_03-model_00-model_states.pt... +0: [2022-11-25 17:58:19,531] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_03-model_00-model_states.pt. +0: [2022-11-25 17:58:19,531] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_04-model_00-model_states.pt... +0: [2022-11-25 17:58:19,546] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_04-model_00-model_states.pt. +0: [2022-11-25 17:58:19,546] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_05-model_00-model_states.pt... +0: [2022-11-25 17:58:19,560] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_05-model_00-model_states.pt. +0: [2022-11-25 17:58:19,561] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_06-model_00-model_states.pt... +0: [2022-11-25 17:58:19,575] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_06-model_00-model_states.pt. +0: [2022-11-25 17:58:19,576] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_07-model_00-model_states.pt... +0: [2022-11-25 17:58:19,590] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_07-model_00-model_states.pt. +0: [2022-11-25 17:58:19,591] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_08-model_00-model_states.pt... +0: [2022-11-25 17:58:19,606] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_08-model_00-model_states.pt. +0: [2022-11-25 17:58:19,606] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_09-model_00-model_states.pt... +0: [2022-11-25 17:58:19,621] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_09-model_00-model_states.pt. +0: [2022-11-25 17:58:19,621] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_10-model_00-model_states.pt... +0: [2022-11-25 17:58:19,636] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_10-model_00-model_states.pt. +0: [2022-11-25 17:58:19,636] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_11-model_00-model_states.pt... +0: [2022-11-25 17:58:19,651] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_11-model_00-model_states.pt. +0: [2022-11-25 17:58:19,651] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_12-model_00-model_states.pt... +0: [2022-11-25 17:58:19,666] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_12-model_00-model_states.pt. +0: [2022-11-25 17:58:19,666] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_13-model_00-model_states.pt... +0: [2022-11-25 17:58:19,681] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_13-model_00-model_states.pt. +0: [2022-11-25 17:58:19,681] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_14-model_00-model_states.pt... +0: [2022-11-25 17:58:19,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_14-model_00-model_states.pt. +0: [2022-11-25 17:58:19,696] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_15-model_00-model_states.pt... +0: [2022-11-25 17:58:19,711] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_15-model_00-model_states.pt. +0: [2022-11-25 17:58:19,711] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_16-model_00-model_states.pt... +0: [2022-11-25 17:58:19,725] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_16-model_00-model_states.pt. +0: [2022-11-25 17:58:19,726] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_17-model_00-model_states.pt... +0: [2022-11-25 17:58:19,740] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_17-model_00-model_states.pt. +0: [2022-11-25 17:58:19,740] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/layer_19-model_00-model_states.pt... +0: [2022-11-25 17:58:19,741] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/layer_19-model_00-model_states.pt. +0: [2022-11-25 17:58:19,742] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step5000/mp_rank_00_model_states.pt +0: [2022-11-25 17:58:19,742] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/mp_rank_00_model_states.pt... +0: [2022-11-25 17:58:19,744] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/mp_rank_00_model_states.pt. +0: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +7: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +2: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +5: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +6: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +3: [2022-11-25 17:58:19,762] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step5000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +0: [2022-11-25 17:58:19,794] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:58:19,794] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:58:19,794] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +7: [2022-11-25 17:58:19,794] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +0: [2022-11-25 17:58:19,794] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2022-11-25 17:58:19,794] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2022-11-25 17:58:19,796] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:58:19,796] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 17:58:19,796] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2022-11-25 17:58:19,798] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:58:19,798] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +5: [2022-11-25 17:58:19,798] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:58:19,798] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2022-11-25 17:58:19,798] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:58:19,798] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 17:58:19,798] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 17:58:19,798] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2022-11-25 17:58:19,798] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2022-11-25 17:58:19,798] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:58:19,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:58:19,799] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 17:58:19,799] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2022-11-25 17:58:19,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:58:19,799] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 17:58:19,799] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:58:19,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:58:19,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 17:58:19,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:58:19,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:58:19,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:58:19,802] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:58:19,802] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 17:58:19,802] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2022-11-25 17:58:19,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 17:58:19,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 17:58:19,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2022-11-25 17:58:19,802] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:58:19,802] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:58:19,802] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 17:58:19,802] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 17:58:19,802] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2022-11-25 17:58:19,802] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2022-11-25 17:58:19,802] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:58:19,802] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 17:58:19,802] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2022-11-25 17:58:19,795] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:58:19,794] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:58:19,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 17:58:19,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:58:19,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 17:58:19,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2022-11-25 17:58:19,794] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 17:58:19,794] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2022-11-25 17:58:19,798] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:58:19,798] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 17:58:19,798] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2022-11-25 17:58:19,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:58:19,799] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2022-11-25 17:58:19,799] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2022-11-25 17:58:19,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:58:19,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:58:19,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:58:19,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2022-11-25 17:58:19,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2022-11-25 17:58:19,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2022-11-25 17:58:19,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:58:19,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 17:58:19,804] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2022-11-25 17:58:19,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:58:19,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 17:58:19,804] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2022-11-25 17:58:19,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:58:19,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:58:19,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:58:19,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:58:19,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +6: [2022-11-25 17:58:19,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 17:58:19,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +0: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:58:19,805] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 17:58:19,805] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2022-11-25 17:58:19,805] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:58:19,805] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +5: [2022-11-25 17:58:19,805] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +7: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2022-11-25 17:58:19,805] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +5: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2022-11-25 17:58:19,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2022-11-25 17:58:19,806] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 17:58:19,806] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 17:58:19,806] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2022-11-25 17:58:19,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 17:58:19,807] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 17:58:19,807] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2022-11-25 17:58:19,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2022-11-25 17:58:19,807] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2022-11-25 17:58:19,807] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2022-11-25 17:58:19,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 17:58:19,804] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2022-11-25 17:58:19,808] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:58:19,808] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 17:58:19,808] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2022-11-25 17:58:19,809] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 17:58:19,809] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 17:58:19,809] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2022-11-25 17:58:19,810] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:58:19,810] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:58:19,810] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:58:19,810] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 17:58:19,810] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 17:58:19,810] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 17:58:19,810] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2022-11-25 17:58:19,810] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2022-11-25 17:58:19,810] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2022-11-25 17:58:19,795] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 17:58:19,795] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2022-11-25 17:58:19,798] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:58:19,798] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 17:58:19,798] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2022-11-25 17:58:19,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:58:19,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 17:58:19,804] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2022-11-25 17:58:19,810] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:58:19,810] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:58:19,810] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2022-11-25 17:58:19,810] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 17:58:19,810] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2022-11-25 17:58:19,810] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2022-11-25 17:58:19,811] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:58:19,811] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2022-11-25 17:58:19,811] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2022-11-25 17:58:19,811] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:58:19,811] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 17:58:19,811] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2022-11-25 17:58:19,811] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 17:58:19,811] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2022-11-25 17:58:19,811] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2022-11-25 17:58:19,822] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:58:19,822] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 17:58:19,822] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2022-11-25 17:58:19,823] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 17:58:19,823] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2022-11-25 17:58:19,831] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 17:58:19,831] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 17:58:19,831] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2022-11-25 17:58:19,837] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:58:19,837] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 17:58:19,837] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2022-11-25 17:58:19,842] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 17:58:19,842] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step5000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 17:58:19,843] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: successfully saved checkpoint at iteration 5000 to checkpoints_146m +7: time (ms) | save-checkpoint: 419.70 +7: iteration 5010/ 21553 | consumed samples: 1282560 | consumed tokens: 2626682880 | elapsed time per iteration (s): 0.35 | learning rate: 1.785E-04 | global batch size: 256 | lm loss: 2.789563E+00 | grad norm: 0.368 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 725.988 | TFLOPs: 25.41 | +7: iteration 5020/ 21553 | consumed samples: 1285120 | consumed tokens: 2631925760 | elapsed time per iteration (s): 0.30 | learning rate: 1.784E-04 | global batch size: 256 | lm loss: 2.768656E+00 | grad norm: 0.328 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.378 | TFLOPs: 29.52 | +7: iteration 5030/ 21553 | consumed samples: 1287680 | consumed tokens: 2637168640 | elapsed time per iteration (s): 0.31 | learning rate: 1.783E-04 | global batch size: 256 | lm loss: 2.783338E+00 | grad norm: 0.362 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.496 | TFLOPs: 28.76 | +7: iteration 5040/ 21553 | consumed samples: 1290240 | consumed tokens: 2642411520 | elapsed time per iteration (s): 0.30 | learning rate: 1.782E-04 | global batch size: 256 | lm loss: 2.762745E+00 | grad norm: 0.329 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.517 | TFLOPs: 29.46 | +7: iteration 5050/ 21553 | consumed samples: 1292800 | consumed tokens: 2647654400 | elapsed time per iteration (s): 0.31 | learning rate: 1.781E-04 | global batch size: 256 | lm loss: 2.756079E+00 | grad norm: 0.353 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.318 | TFLOPs: 28.82 | +7: iteration 5060/ 21553 | consumed samples: 1295360 | consumed tokens: 2652897280 | elapsed time per iteration (s): 0.31 | learning rate: 1.781E-04 | global batch size: 256 | lm loss: 2.764389E+00 | grad norm: 0.329 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.559 | TFLOPs: 28.97 | +7: iteration 5070/ 21553 | consumed samples: 1297920 | consumed tokens: 2658140160 | elapsed time per iteration (s): 0.31 | learning rate: 1.780E-04 | global batch size: 256 | lm loss: 2.786765E+00 | grad norm: 0.336 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.763 | TFLOPs: 28.77 | +7: iteration 5080/ 21553 | consumed samples: 1300480 | consumed tokens: 2663383040 | elapsed time per iteration (s): 0.30 | learning rate: 1.779E-04 | global batch size: 256 | lm loss: 2.810010E+00 | grad norm: 0.326 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.435 | TFLOPs: 29.39 | +7: iteration 5090/ 21553 | consumed samples: 1303040 | consumed tokens: 2668625920 | elapsed time per iteration (s): 0.30 | learning rate: 1.778E-04 | global batch size: 256 | lm loss: 2.785743E+00 | grad norm: 0.322 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.378 | TFLOPs: 29.38 | +7: iteration 5100/ 21553 | consumed samples: 1305600 | consumed tokens: 2673868800 | elapsed time per iteration (s): 0.30 | learning rate: 1.777E-04 | global batch size: 256 | lm loss: 2.816172E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.924 | TFLOPs: 30.00 | +7: iteration 5110/ 21553 | consumed samples: 1308160 | consumed tokens: 2679111680 | elapsed time per iteration (s): 0.30 | learning rate: 1.776E-04 | global batch size: 256 | lm loss: 2.795464E+00 | grad norm: 0.361 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.620 | TFLOPs: 29.67 | +7: iteration 5120/ 21553 | consumed samples: 1310720 | consumed tokens: 2684354560 | elapsed time per iteration (s): 0.32 | learning rate: 1.775E-04 | global batch size: 256 | lm loss: 2.799277E+00 | grad norm: 0.328 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.448 | TFLOPs: 28.27 | +7: iteration 5130/ 21553 | consumed samples: 1313280 | consumed tokens: 2689597440 | elapsed time per iteration (s): 0.31 | learning rate: 1.775E-04 | global batch size: 256 | lm loss: 2.798460E+00 | grad norm: 0.358 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.111 | TFLOPs: 28.99 | +7: iteration 5140/ 21553 | consumed samples: 1315840 | consumed tokens: 2694840320 | elapsed time per iteration (s): 0.31 | learning rate: 1.774E-04 | global batch size: 256 | lm loss: 2.784162E+00 | grad norm: 0.351 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.502 | TFLOPs: 29.11 | +7: iteration 5150/ 21553 | consumed samples: 1318400 | consumed tokens: 2700083200 | elapsed time per iteration (s): 0.30 | learning rate: 1.773E-04 | global batch size: 256 | lm loss: 2.798956E+00 | grad norm: 0.371 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.480 | TFLOPs: 29.42 | +7: iteration 5160/ 21553 | consumed samples: 1320960 | consumed tokens: 2705326080 | elapsed time per iteration (s): 0.30 | learning rate: 1.772E-04 | global batch size: 256 | lm loss: 2.799203E+00 | grad norm: 0.388 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.034 | TFLOPs: 29.72 | +7: iteration 5170/ 21553 | consumed samples: 1323520 | consumed tokens: 2710568960 | elapsed time per iteration (s): 0.31 | learning rate: 1.771E-04 | global batch size: 256 | lm loss: 2.783388E+00 | grad norm: 0.342 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.266 | TFLOPs: 29.35 | +7: iteration 5180/ 21553 | consumed samples: 1326080 | consumed tokens: 2715811840 | elapsed time per iteration (s): 0.30 | learning rate: 1.770E-04 | global batch size: 256 | lm loss: 2.788422E+00 | grad norm: 0.333 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.068 | TFLOPs: 29.72 | +7: iteration 5190/ 21553 | consumed samples: 1328640 | consumed tokens: 2721054720 | elapsed time per iteration (s): 0.30 | learning rate: 1.769E-04 | global batch size: 256 | lm loss: 2.766456E+00 | grad norm: 0.373 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.635 | TFLOPs: 29.57 | +7: iteration 5200/ 21553 | consumed samples: 1331200 | consumed tokens: 2726297600 | elapsed time per iteration (s): 0.31 | learning rate: 1.768E-04 | global batch size: 256 | lm loss: 2.779356E+00 | grad norm: 0.373 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.450 | TFLOPs: 29.07 | +7: iteration 5210/ 21553 | consumed samples: 1333760 | consumed tokens: 2731540480 | elapsed time per iteration (s): 0.30 | learning rate: 1.767E-04 | global batch size: 256 | lm loss: 2.758022E+00 | grad norm: 0.346 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.385 | TFLOPs: 29.94 | +7: iteration 5220/ 21553 | consumed samples: 1336320 | consumed tokens: 2736783360 | elapsed time per iteration (s): 0.31 | learning rate: 1.767E-04 | global batch size: 256 | lm loss: 2.768659E+00 | grad norm: 0.343 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.651 | TFLOPs: 29.29 | +7: iteration 5230/ 21553 | consumed samples: 1338880 | consumed tokens: 2742026240 | elapsed time per iteration (s): 0.31 | learning rate: 1.766E-04 | global batch size: 256 | lm loss: 2.809244E+00 | grad norm: 0.354 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.703 | TFLOPs: 29.19 | +7: iteration 5240/ 21553 | consumed samples: 1341440 | consumed tokens: 2747269120 | elapsed time per iteration (s): 0.31 | learning rate: 1.765E-04 | global batch size: 256 | lm loss: 2.763100E+00 | grad norm: 0.377 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.047 | TFLOPs: 28.99 | +7: iteration 5250/ 21553 | consumed samples: 1344000 | consumed tokens: 2752512000 | elapsed time per iteration (s): 0.31 | learning rate: 1.764E-04 | global batch size: 256 | lm loss: 2.788865E+00 | grad norm: 0.353 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.019 | TFLOPs: 29.02 | +7: iteration 5260/ 21553 | consumed samples: 1346560 | consumed tokens: 2757754880 | elapsed time per iteration (s): 0.31 | learning rate: 1.763E-04 | global batch size: 256 | lm loss: 2.803653E+00 | grad norm: 0.335 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.634 | TFLOPs: 28.73 | +7: iteration 5270/ 21553 | consumed samples: 1349120 | consumed tokens: 2762997760 | elapsed time per iteration (s): 0.31 | learning rate: 1.762E-04 | global batch size: 256 | lm loss: 2.740223E+00 | grad norm: 0.349 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.574 | TFLOPs: 28.80 | +7: iteration 5280/ 21553 | consumed samples: 1351680 | consumed tokens: 2768240640 | elapsed time per iteration (s): 0.30 | learning rate: 1.761E-04 | global batch size: 256 | lm loss: 2.794492E+00 | grad norm: 0.334 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.031 | TFLOPs: 29.79 | +7: iteration 5290/ 21553 | consumed samples: 1354240 | consumed tokens: 2773483520 | elapsed time per iteration (s): 0.30 | learning rate: 1.760E-04 | global batch size: 256 | lm loss: 2.757253E+00 | grad norm: 0.343 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.381 | TFLOPs: 29.73 | +7: iteration 5300/ 21553 | consumed samples: 1356800 | consumed tokens: 2778726400 | elapsed time per iteration (s): 0.30 | learning rate: 1.759E-04 | global batch size: 256 | lm loss: 2.761913E+00 | grad norm: 0.334 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.296 | TFLOPs: 29.52 | +7: iteration 5310/ 21553 | consumed samples: 1359360 | consumed tokens: 2783969280 | elapsed time per iteration (s): 0.30 | learning rate: 1.758E-04 | global batch size: 256 | lm loss: 2.801473E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.575 | TFLOPs: 29.85 | +7: iteration 5320/ 21553 | consumed samples: 1361920 | consumed tokens: 2789212160 | elapsed time per iteration (s): 0.31 | learning rate: 1.758E-04 | global batch size: 256 | lm loss: 2.772847E+00 | grad norm: 0.325 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.492 | TFLOPs: 28.90 | +7: iteration 5330/ 21553 | consumed samples: 1364480 | consumed tokens: 2794455040 | elapsed time per iteration (s): 0.31 | learning rate: 1.757E-04 | global batch size: 256 | lm loss: 2.777376E+00 | grad norm: 0.328 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.102 | TFLOPs: 29.34 | +7: iteration 5340/ 21553 | consumed samples: 1367040 | consumed tokens: 2799697920 | elapsed time per iteration (s): 0.30 | learning rate: 1.756E-04 | global batch size: 256 | lm loss: 2.802417E+00 | grad norm: 0.337 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.388 | TFLOPs: 29.73 | +7: iteration 5350/ 21553 | consumed samples: 1369600 | consumed tokens: 2804940800 | elapsed time per iteration (s): 0.31 | learning rate: 1.755E-04 | global batch size: 256 | lm loss: 2.748513E+00 | grad norm: 0.349 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.798 | TFLOPs: 29.33 | +7: iteration 5360/ 21553 | consumed samples: 1372160 | consumed tokens: 2810183680 | elapsed time per iteration (s): 0.30 | learning rate: 1.754E-04 | global batch size: 256 | lm loss: 2.803759E+00 | grad norm: 0.369 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.978 | TFLOPs: 29.55 | +7: iteration 5370/ 21553 | consumed samples: 1374720 | consumed tokens: 2815426560 | elapsed time per iteration (s): 0.30 | learning rate: 1.753E-04 | global batch size: 256 | lm loss: 2.788057E+00 | grad norm: 0.327 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.894 | TFLOPs: 29.68 | +7: iteration 5380/ 21553 | consumed samples: 1377280 | consumed tokens: 2820669440 | elapsed time per iteration (s): 0.30 | learning rate: 1.752E-04 | global batch size: 256 | lm loss: 2.746953E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.081 | TFLOPs: 29.58 | +7: iteration 5390/ 21553 | consumed samples: 1379840 | consumed tokens: 2825912320 | elapsed time per iteration (s): 0.30 | learning rate: 1.751E-04 | global batch size: 256 | lm loss: 2.782843E+00 | grad norm: 0.364 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.720 | TFLOPs: 29.75 | +7: iteration 5400/ 21553 | consumed samples: 1382400 | consumed tokens: 2831155200 | elapsed time per iteration (s): 0.30 | learning rate: 1.750E-04 | global batch size: 256 | lm loss: 2.743900E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.182 | TFLOPs: 29.52 | +7: iteration 5410/ 21553 | consumed samples: 1384960 | consumed tokens: 2836398080 | elapsed time per iteration (s): 0.30 | learning rate: 1.749E-04 | global batch size: 256 | lm loss: 2.779528E+00 | grad norm: 0.314 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.442 | TFLOPs: 29.49 | +7: iteration 5420/ 21553 | consumed samples: 1387520 | consumed tokens: 2841640960 | elapsed time per iteration (s): 0.31 | learning rate: 1.748E-04 | global batch size: 256 | lm loss: 2.767011E+00 | grad norm: 0.344 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.209 | TFLOPs: 28.96 | +7: iteration 5430/ 21553 | consumed samples: 1390080 | consumed tokens: 2846883840 | elapsed time per iteration (s): 0.31 | learning rate: 1.748E-04 | global batch size: 256 | lm loss: 2.788924E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.993 | TFLOPs: 29.16 | +7: iteration 5440/ 21553 | consumed samples: 1392640 | consumed tokens: 2852126720 | elapsed time per iteration (s): 0.30 | learning rate: 1.747E-04 | global batch size: 256 | lm loss: 2.772049E+00 | grad norm: 0.341 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.335 | TFLOPs: 29.59 | +7: iteration 5450/ 21553 | consumed samples: 1395200 | consumed tokens: 2857369600 | elapsed time per iteration (s): 0.30 | learning rate: 1.746E-04 | global batch size: 256 | lm loss: 2.777849E+00 | grad norm: 0.321 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.654 | TFLOPs: 29.60 | +7: iteration 5460/ 21553 | consumed samples: 1397760 | consumed tokens: 2862612480 | elapsed time per iteration (s): 0.30 | learning rate: 1.745E-04 | global batch size: 256 | lm loss: 2.797121E+00 | grad norm: 0.335 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.540 | TFLOPs: 29.49 | +7: iteration 5470/ 21553 | consumed samples: 1400320 | consumed tokens: 2867855360 | elapsed time per iteration (s): 0.30 | learning rate: 1.744E-04 | global batch size: 256 | lm loss: 2.772616E+00 | grad norm: 0.332 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.642 | TFLOPs: 29.43 | +7: iteration 5480/ 21553 | consumed samples: 1402880 | consumed tokens: 2873098240 | elapsed time per iteration (s): 0.32 | learning rate: 1.743E-04 | global batch size: 256 | lm loss: 2.770176E+00 | grad norm: 0.336 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 805.933 | TFLOPs: 28.21 | +7: iteration 5490/ 21553 | consumed samples: 1405440 | consumed tokens: 2878341120 | elapsed time per iteration (s): 0.31 | learning rate: 1.742E-04 | global batch size: 256 | lm loss: 2.742739E+00 | grad norm: 0.361 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.606 | TFLOPs: 28.83 | +7: iteration 5500/ 21553 | consumed samples: 1408000 | consumed tokens: 2883584000 | elapsed time per iteration (s): 0.30 | learning rate: 1.741E-04 | global batch size: 256 | lm loss: 2.805213E+00 | grad norm: 0.387 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.876 | TFLOPs: 29.68 | +7: iteration 5510/ 21553 | consumed samples: 1410560 | consumed tokens: 2888826880 | elapsed time per iteration (s): 0.31 | learning rate: 1.740E-04 | global batch size: 256 | lm loss: 2.770165E+00 | grad norm: 0.357 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.673 | TFLOPs: 28.83 | +7: iteration 5520/ 21553 | consumed samples: 1413120 | consumed tokens: 2894069760 | elapsed time per iteration (s): 0.31 | learning rate: 1.739E-04 | global batch size: 256 | lm loss: 2.800453E+00 | grad norm: 0.328 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.803 | TFLOPs: 28.98 | +7: iteration 5530/ 21553 | consumed samples: 1415680 | consumed tokens: 2899312640 | elapsed time per iteration (s): 0.31 | learning rate: 1.738E-04 | global batch size: 256 | lm loss: 2.767376E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 818.455 | TFLOPs: 28.65 | +7: iteration 5540/ 21553 | consumed samples: 1418240 | consumed tokens: 2904555520 | elapsed time per iteration (s): 0.31 | learning rate: 1.737E-04 | global batch size: 256 | lm loss: 2.809337E+00 | grad norm: 0.332 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.355 | TFLOPs: 28.61 | +7: iteration 5550/ 21553 | consumed samples: 1420800 | consumed tokens: 2909798400 | elapsed time per iteration (s): 0.30 | learning rate: 1.736E-04 | global batch size: 256 | lm loss: 2.775531E+00 | grad norm: 0.329 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.190 | TFLOPs: 29.45 | +7: iteration 5560/ 21553 | consumed samples: 1423360 | consumed tokens: 2915041280 | elapsed time per iteration (s): 0.31 | learning rate: 1.735E-04 | global batch size: 256 | lm loss: 2.764611E+00 | grad norm: 0.345 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.311 | TFLOPs: 29.17 | +7: iteration 5570/ 21553 | consumed samples: 1425920 | consumed tokens: 2920284160 | elapsed time per iteration (s): 0.30 | learning rate: 1.735E-04 | global batch size: 256 | lm loss: 2.792566E+00 | grad norm: 0.348 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.889 | TFLOPs: 29.65 | +7: iteration 5580/ 21553 | consumed samples: 1428480 | consumed tokens: 2925527040 | elapsed time per iteration (s): 0.31 | learning rate: 1.734E-04 | global batch size: 256 | lm loss: 2.751311E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.907 | TFLOPs: 28.84 | +7: iteration 5590/ 21553 | consumed samples: 1431040 | consumed tokens: 2930769920 | elapsed time per iteration (s): 0.30 | learning rate: 1.733E-04 | global batch size: 256 | lm loss: 2.762774E+00 | grad norm: 0.355 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.837 | TFLOPs: 29.58 | +7: iteration 5600/ 21553 | consumed samples: 1433600 | consumed tokens: 2936012800 | elapsed time per iteration (s): 0.30 | learning rate: 1.732E-04 | global batch size: 256 | lm loss: 2.748348E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.922 | TFLOPs: 29.51 | +7: iteration 5610/ 21553 | consumed samples: 1436160 | consumed tokens: 2941255680 | elapsed time per iteration (s): 0.31 | learning rate: 1.731E-04 | global batch size: 256 | lm loss: 2.803632E+00 | grad norm: 0.335 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.345 | TFLOPs: 28.82 | +7: iteration 5620/ 21553 | consumed samples: 1438720 | consumed tokens: 2946498560 | elapsed time per iteration (s): 0.31 | learning rate: 1.730E-04 | global batch size: 256 | lm loss: 2.751375E+00 | grad norm: 0.349 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.321 | TFLOPs: 28.96 | +7: iteration 5630/ 21553 | consumed samples: 1441280 | consumed tokens: 2951741440 | elapsed time per iteration (s): 0.31 | learning rate: 1.729E-04 | global batch size: 256 | lm loss: 2.803796E+00 | grad norm: 0.335 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.644 | TFLOPs: 29.08 | +7: iteration 5640/ 21553 | consumed samples: 1443840 | consumed tokens: 2956984320 | elapsed time per iteration (s): 0.31 | learning rate: 1.728E-04 | global batch size: 256 | lm loss: 2.720516E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.272 | TFLOPs: 29.17 | +7: iteration 5650/ 21553 | consumed samples: 1446400 | consumed tokens: 2962227200 | elapsed time per iteration (s): 0.31 | learning rate: 1.727E-04 | global batch size: 256 | lm loss: 2.771934E+00 | grad norm: 0.346 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.196 | TFLOPs: 29.31 | +7: iteration 5660/ 21553 | consumed samples: 1448960 | consumed tokens: 2967470080 | elapsed time per iteration (s): 0.31 | learning rate: 1.726E-04 | global batch size: 256 | lm loss: 2.752773E+00 | grad norm: 0.346 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.631 | TFLOPs: 29.01 | +7: iteration 5670/ 21553 | consumed samples: 1451520 | consumed tokens: 2972712960 | elapsed time per iteration (s): 0.30 | learning rate: 1.725E-04 | global batch size: 256 | lm loss: 2.793122E+00 | grad norm: 0.371 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.953 | TFLOPs: 29.72 | +7: iteration 5680/ 21553 | consumed samples: 1454080 | consumed tokens: 2977955840 | elapsed time per iteration (s): 0.31 | learning rate: 1.724E-04 | global batch size: 256 | lm loss: 2.775712E+00 | grad norm: 0.339 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.216 | TFLOPs: 29.24 | +7: iteration 5690/ 21553 | consumed samples: 1456640 | consumed tokens: 2983198720 | elapsed time per iteration (s): 0.30 | learning rate: 1.723E-04 | global batch size: 256 | lm loss: 2.815835E+00 | grad norm: 0.337 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.732 | TFLOPs: 29.50 | +7: iteration 5700/ 21553 | consumed samples: 1459200 | consumed tokens: 2988441600 | elapsed time per iteration (s): 0.30 | learning rate: 1.722E-04 | global batch size: 256 | lm loss: 2.790644E+00 | grad norm: 0.329 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.475 | TFLOPs: 29.49 | +7: iteration 5710/ 21553 | consumed samples: 1461760 | consumed tokens: 2993684480 | elapsed time per iteration (s): 0.30 | learning rate: 1.721E-04 | global batch size: 256 | lm loss: 2.759221E+00 | grad norm: 0.358 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.528 | TFLOPs: 29.49 | +7: iteration 5720/ 21553 | consumed samples: 1464320 | consumed tokens: 2998927360 | elapsed time per iteration (s): 0.31 | learning rate: 1.720E-04 | global batch size: 256 | lm loss: 2.741006E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.918 | TFLOPs: 28.84 | +7: iteration 5730/ 21553 | consumed samples: 1466880 | consumed tokens: 3004170240 | elapsed time per iteration (s): 0.31 | learning rate: 1.719E-04 | global batch size: 256 | lm loss: 2.788582E+00 | grad norm: 0.346 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.232 | TFLOPs: 29.24 | +7: iteration 5740/ 21553 | consumed samples: 1469440 | consumed tokens: 3009413120 | elapsed time per iteration (s): 0.31 | learning rate: 1.718E-04 | global batch size: 256 | lm loss: 2.759112E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.055 | TFLOPs: 29.13 | +7: iteration 5750/ 21553 | consumed samples: 1472000 | consumed tokens: 3014656000 | elapsed time per iteration (s): 0.30 | learning rate: 1.717E-04 | global batch size: 256 | lm loss: 2.719102E+00 | grad norm: 0.326 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.282 | TFLOPs: 29.63 | +7: iteration 5760/ 21553 | consumed samples: 1474560 | consumed tokens: 3019898880 | elapsed time per iteration (s): 0.31 | learning rate: 1.716E-04 | global batch size: 256 | lm loss: 2.755179E+00 | grad norm: 0.321 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.079 | TFLOPs: 29.27 | +7: iteration 5770/ 21553 | consumed samples: 1477120 | consumed tokens: 3025141760 | elapsed time per iteration (s): 0.31 | learning rate: 1.715E-04 | global batch size: 256 | lm loss: 2.748029E+00 | grad norm: 0.320 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.307 | TFLOPs: 29.07 | +7: iteration 5780/ 21553 | consumed samples: 1479680 | consumed tokens: 3030384640 | elapsed time per iteration (s): 0.31 | learning rate: 1.714E-04 | global batch size: 256 | lm loss: 2.748699E+00 | grad norm: 0.336 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.109 | TFLOPs: 29.27 | +7: iteration 5790/ 21553 | consumed samples: 1482240 | consumed tokens: 3035627520 | elapsed time per iteration (s): 0.30 | learning rate: 1.714E-04 | global batch size: 256 | lm loss: 2.762837E+00 | grad norm: 0.330 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.495 | TFLOPs: 29.67 | +7: iteration 5800/ 21553 | consumed samples: 1484800 | consumed tokens: 3040870400 | elapsed time per iteration (s): 0.30 | learning rate: 1.713E-04 | global batch size: 256 | lm loss: 2.782656E+00 | grad norm: 0.364 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.560 | TFLOPs: 29.53 | +7: iteration 5810/ 21553 | consumed samples: 1487360 | consumed tokens: 3046113280 | elapsed time per iteration (s): 0.31 | learning rate: 1.712E-04 | global batch size: 256 | lm loss: 2.748693E+00 | grad norm: 0.334 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.140 | TFLOPs: 28.61 | +7: iteration 5820/ 21553 | consumed samples: 1489920 | consumed tokens: 3051356160 | elapsed time per iteration (s): 0.31 | learning rate: 1.711E-04 | global batch size: 256 | lm loss: 2.780427E+00 | grad norm: 0.332 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.124 | TFLOPs: 28.75 | +7: iteration 5830/ 21553 | consumed samples: 1492480 | consumed tokens: 3056599040 | elapsed time per iteration (s): 0.31 | learning rate: 1.710E-04 | global batch size: 256 | lm loss: 2.747077E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.383 | TFLOPs: 29.28 | +7: iteration 5840/ 21553 | consumed samples: 1495040 | consumed tokens: 3061841920 | elapsed time per iteration (s): 0.30 | learning rate: 1.709E-04 | global batch size: 256 | lm loss: 2.763966E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.513 | TFLOPs: 29.77 | +7: iteration 5850/ 21553 | consumed samples: 1497600 | consumed tokens: 3067084800 | elapsed time per iteration (s): 0.31 | learning rate: 1.708E-04 | global batch size: 256 | lm loss: 2.770498E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.194 | TFLOPs: 28.85 | +7: iteration 5860/ 21553 | consumed samples: 1500160 | consumed tokens: 3072327680 | elapsed time per iteration (s): 0.31 | learning rate: 1.707E-04 | global batch size: 256 | lm loss: 2.760128E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.706 | TFLOPs: 29.33 | +7: iteration 5870/ 21553 | consumed samples: 1502720 | consumed tokens: 3077570560 | elapsed time per iteration (s): 0.31 | learning rate: 1.706E-04 | global batch size: 256 | lm loss: 2.763048E+00 | grad norm: 0.340 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.089 | TFLOPs: 28.95 | +7: iteration 5880/ 21553 | consumed samples: 1505280 | consumed tokens: 3082813440 | elapsed time per iteration (s): 0.31 | learning rate: 1.705E-04 | global batch size: 256 | lm loss: 2.760589E+00 | grad norm: 0.344 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.221 | TFLOPs: 29.24 | +7: iteration 5890/ 21553 | consumed samples: 1507840 | consumed tokens: 3088056320 | elapsed time per iteration (s): 0.31 | learning rate: 1.704E-04 | global batch size: 256 | lm loss: 2.718811E+00 | grad norm: 0.338 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.310 | TFLOPs: 29.10 | +7: iteration 5900/ 21553 | consumed samples: 1510400 | consumed tokens: 3093299200 | elapsed time per iteration (s): 0.31 | learning rate: 1.703E-04 | global batch size: 256 | lm loss: 2.731684E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.666 | TFLOPs: 29.01 | +7: iteration 5910/ 21553 | consumed samples: 1512960 | consumed tokens: 3098542080 | elapsed time per iteration (s): 0.31 | learning rate: 1.702E-04 | global batch size: 256 | lm loss: 2.752152E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.656 | TFLOPs: 29.36 | +7: iteration 5920/ 21553 | consumed samples: 1515520 | consumed tokens: 3103784960 | elapsed time per iteration (s): 0.31 | learning rate: 1.701E-04 | global batch size: 256 | lm loss: 2.772176E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.874 | TFLOPs: 28.84 | +7: iteration 5930/ 21553 | consumed samples: 1518080 | consumed tokens: 3109027840 | elapsed time per iteration (s): 0.30 | learning rate: 1.700E-04 | global batch size: 256 | lm loss: 2.749955E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.869 | TFLOPs: 29.75 | +7: iteration 5940/ 21553 | consumed samples: 1520640 | consumed tokens: 3114270720 | elapsed time per iteration (s): 0.30 | learning rate: 1.699E-04 | global batch size: 256 | lm loss: 2.754134E+00 | grad norm: 0.328 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.758 | TFLOPs: 29.47 | +7: iteration 5950/ 21553 | consumed samples: 1523200 | consumed tokens: 3119513600 | elapsed time per iteration (s): 0.31 | learning rate: 1.698E-04 | global batch size: 256 | lm loss: 2.762892E+00 | grad norm: 0.336 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.337 | TFLOPs: 28.93 | +7: iteration 5960/ 21553 | consumed samples: 1525760 | consumed tokens: 3124756480 | elapsed time per iteration (s): 0.31 | learning rate: 1.697E-04 | global batch size: 256 | lm loss: 2.760122E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.267 | TFLOPs: 29.35 | +7: iteration 5970/ 21553 | consumed samples: 1528320 | consumed tokens: 3129999360 | elapsed time per iteration (s): 0.30 | learning rate: 1.696E-04 | global batch size: 256 | lm loss: 2.797779E+00 | grad norm: 0.317 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.445 | TFLOPs: 29.74 | +7: iteration 5980/ 21553 | consumed samples: 1530880 | consumed tokens: 3135242240 | elapsed time per iteration (s): 0.30 | learning rate: 1.695E-04 | global batch size: 256 | lm loss: 2.759071E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.924 | TFLOPs: 29.82 | +7: iteration 5990/ 21553 | consumed samples: 1533440 | consumed tokens: 3140485120 | elapsed time per iteration (s): 0.31 | learning rate: 1.694E-04 | global batch size: 256 | lm loss: 2.766663E+00 | grad norm: 0.319 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.107 | TFLOPs: 29.37 | +0: [2022-11-25 18:03:26,009] [INFO] [logging.py:68:log_dist] [Rank 0] step=6000, skipped=0, lr=[0.00016928570742907802, 0.00016928570742907802, 0.00016928570742907802], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 6000/ 21553 | consumed samples: 1536000 | consumed tokens: 3145728000 | elapsed time per iteration (s): 0.31 | learning rate: 1.693E-04 | global batch size: 256 | lm loss: 2.753139E+00 | grad norm: 0.340 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.938 | TFLOPs: 28.84 | +0: steps: 6000 loss: 2.7654 iter time (s): 0.304 samples/sec: 840.908 +7: ------------------------------------------------------------------------------------------ +7: valid loss at iteration 6000 | lm loss value: 2.760461E+00 | lm loss PPL: 1.580713E+01 | +7: ------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 6000 to checkpoints_146m +0: [2022-11-25 18:03:26,130] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step6000 is begin to save! +0: [2022-11-25 18:03:26,134] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_01-model_00-model_states.pt... +0: [2022-11-25 18:03:26,218] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_01-model_00-model_states.pt. +0: [2022-11-25 18:03:26,218] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_03-model_00-model_states.pt... +0: [2022-11-25 18:03:26,234] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_03-model_00-model_states.pt. +0: [2022-11-25 18:03:26,234] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_04-model_00-model_states.pt... +0: [2022-11-25 18:03:26,249] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_04-model_00-model_states.pt. +0: [2022-11-25 18:03:26,249] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_05-model_00-model_states.pt... +0: [2022-11-25 18:03:26,264] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_05-model_00-model_states.pt. +0: [2022-11-25 18:03:26,264] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_06-model_00-model_states.pt... +0: [2022-11-25 18:03:26,279] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_06-model_00-model_states.pt. +0: [2022-11-25 18:03:26,279] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_07-model_00-model_states.pt... +0: [2022-11-25 18:03:26,294] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_07-model_00-model_states.pt. +0: [2022-11-25 18:03:26,294] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_08-model_00-model_states.pt... +0: [2022-11-25 18:03:26,309] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_08-model_00-model_states.pt. +0: [2022-11-25 18:03:26,309] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_09-model_00-model_states.pt... +0: [2022-11-25 18:03:26,325] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_09-model_00-model_states.pt. +0: [2022-11-25 18:03:26,325] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_10-model_00-model_states.pt... +0: [2022-11-25 18:03:26,340] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_10-model_00-model_states.pt. +0: [2022-11-25 18:03:26,340] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_11-model_00-model_states.pt... +0: [2022-11-25 18:03:26,354] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_11-model_00-model_states.pt. +0: [2022-11-25 18:03:26,355] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_12-model_00-model_states.pt... +0: [2022-11-25 18:03:26,369] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_12-model_00-model_states.pt. +0: [2022-11-25 18:03:26,369] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_13-model_00-model_states.pt... +0: [2022-11-25 18:03:26,384] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_13-model_00-model_states.pt. +0: [2022-11-25 18:03:26,384] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_14-model_00-model_states.pt... +0: [2022-11-25 18:03:26,399] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_14-model_00-model_states.pt. +0: [2022-11-25 18:03:26,399] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_15-model_00-model_states.pt... +0: [2022-11-25 18:03:26,413] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_15-model_00-model_states.pt. +0: [2022-11-25 18:03:26,414] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_16-model_00-model_states.pt... +0: [2022-11-25 18:03:26,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_16-model_00-model_states.pt. +0: [2022-11-25 18:03:26,428] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_17-model_00-model_states.pt... +0: [2022-11-25 18:03:26,443] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_17-model_00-model_states.pt. +0: [2022-11-25 18:03:26,443] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/layer_19-model_00-model_states.pt... +0: [2022-11-25 18:03:26,444] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/layer_19-model_00-model_states.pt. +0: [2022-11-25 18:03:26,445] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step6000/mp_rank_00_model_states.pt +0: [2022-11-25 18:03:26,445] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/mp_rank_00_model_states.pt... +0: [2022-11-25 18:03:26,447] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/mp_rank_00_model_states.pt. +0: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:03:26,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step6000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:03:26,497] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:03:26,497] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 18:03:26,497] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2022-11-25 18:03:26,498] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:03:26,498] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 18:03:26,498] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2022-11-25 18:03:26,499] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:03:26,499] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 18:03:26,499] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2022-11-25 18:03:26,499] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:03:26,500] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2022-11-25 18:03:26,500] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2022-11-25 18:03:26,501] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:03:26,501] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 18:03:26,501] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2022-11-25 18:03:26,501] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:03:26,501] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 18:03:26,501] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2022-11-25 18:03:26,501] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:03:26,501] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 18:03:26,501] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2022-11-25 18:03:26,502] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:03:26,502] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +3: [2022-11-25 18:03:26,502] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:03:26,502] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2022-11-25 18:03:26,502] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 18:03:26,502] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2022-11-25 18:03:26,503] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:03:26,502] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:03:26,503] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 18:03:26,503] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 18:03:26,503] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2022-11-25 18:03:26,502] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 18:03:26,502] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:03:26,503] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2022-11-25 18:03:26,503] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:03:26,503] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2022-11-25 18:03:26,503] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:03:26,503] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +6: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2022-11-25 18:03:26,504] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:03:26,504] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +2: [2022-11-25 18:03:26,504] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +6: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2022-11-25 18:03:26,504] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 18:03:26,504] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2022-11-25 18:03:26,504] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:03:26,504] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:03:26,504] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:03:26,505] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 18:03:26,505] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 18:03:26,505] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2022-11-25 18:03:26,505] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2022-11-25 18:03:26,505] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:03:26,505] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 18:03:26,505] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2022-11-25 18:03:26,505] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:03:26,505] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 18:03:26,505] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:03:26,505] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2022-11-25 18:03:26,506] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 18:03:26,506] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2022-11-25 18:03:26,506] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:03:26,506] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2022-11-25 18:03:26,506] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2022-11-25 18:03:26,506] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:03:26,506] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 18:03:26,506] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2022-11-25 18:03:26,506] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:03:26,506] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:03:26,506] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2022-11-25 18:03:26,506] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +6: [2022-11-25 18:03:26,506] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:03:26,506] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2022-11-25 18:03:26,506] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2022-11-25 18:03:26,506] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 18:03:26,507] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2022-11-25 18:03:26,506] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:03:26,507] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 18:03:26,507] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2022-11-25 18:03:26,507] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:03:26,507] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:03:26,507] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 18:03:26,507] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 18:03:26,507] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2022-11-25 18:03:26,507] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2022-11-25 18:03:26,507] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:03:26,507] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:03:26,507] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:03:26,508] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +0: [2022-11-25 18:03:26,508] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 18:03:26,508] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +7: [2022-11-25 18:03:26,507] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:03:26,508] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2022-11-25 18:03:26,508] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2022-11-25 18:03:26,508] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2022-11-25 18:03:26,508] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 18:03:26,508] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2022-11-25 18:03:26,509] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:03:26,510] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 18:03:26,510] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2022-11-25 18:03:26,510] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:03:26,511] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 18:03:26,511] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2022-11-25 18:03:26,511] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:03:26,511] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:03:26,511] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 18:03:26,511] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 18:03:26,511] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2022-11-25 18:03:26,511] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2022-11-25 18:03:26,511] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:03:26,511] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 18:03:26,511] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2022-11-25 18:03:26,512] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:03:26,512] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 18:03:26,512] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2022-11-25 18:03:26,513] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:03:26,513] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 18:03:26,513] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:03:26,513] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2022-11-25 18:03:26,513] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 18:03:26,513] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2022-11-25 18:03:26,513] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:03:26,514] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:03:26,514] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 18:03:26,514] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2022-11-25 18:03:26,514] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:03:26,514] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 18:03:26,514] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2022-11-25 18:03:26,516] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:03:26,516] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 18:03:26,516] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:03:26,516] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2022-11-25 18:03:26,516] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2022-11-25 18:03:26,516] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2022-11-25 18:03:26,513] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 18:03:26,513] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:03:26,513] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2022-11-25 18:03:26,513] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 18:03:26,514] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2022-11-25 18:03:26,514] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:03:26,514] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 18:03:26,514] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2022-11-25 18:03:26,514] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:03:26,514] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 18:03:26,514] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2022-11-25 18:03:26,526] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:03:26,526] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:03:26,526] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 18:03:26,526] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2022-11-25 18:03:26,527] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 18:03:26,527] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2022-11-25 18:03:26,533] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:03:26,533] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 18:03:26,533] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2022-11-25 18:03:26,538] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:03:26,538] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2022-11-25 18:03:26,538] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2022-11-25 18:03:26,556] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:03:26,556] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 18:03:26,556] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2022-11-25 18:03:26,564] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:03:26,565] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step6000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 18:03:26,565] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: successfully saved checkpoint at iteration 6000 to checkpoints_146m +7: time (ms) | save-checkpoint: 439.18 +7: iteration 6010/ 21553 | consumed samples: 1538560 | consumed tokens: 3150970880 | elapsed time per iteration (s): 0.36 | learning rate: 1.692E-04 | global batch size: 256 | lm loss: 2.777506E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 708.740 | TFLOPs: 24.81 | +7: iteration 6020/ 21553 | consumed samples: 1541120 | consumed tokens: 3156213760 | elapsed time per iteration (s): 0.32 | learning rate: 1.691E-04 | global batch size: 256 | lm loss: 2.741433E+00 | grad norm: 0.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 797.652 | TFLOPs: 27.92 | +7: iteration 6030/ 21553 | consumed samples: 1543680 | consumed tokens: 3161456640 | elapsed time per iteration (s): 0.32 | learning rate: 1.690E-04 | global batch size: 256 | lm loss: 2.736870E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 808.365 | TFLOPs: 28.30 | +7: iteration 6040/ 21553 | consumed samples: 1546240 | consumed tokens: 3166699520 | elapsed time per iteration (s): 0.31 | learning rate: 1.689E-04 | global batch size: 256 | lm loss: 2.783573E+00 | grad norm: 0.338 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.479 | TFLOPs: 29.35 | +7: iteration 6050/ 21553 | consumed samples: 1548800 | consumed tokens: 3171942400 | elapsed time per iteration (s): 0.31 | learning rate: 1.688E-04 | global batch size: 256 | lm loss: 2.727284E+00 | grad norm: 0.327 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.783 | TFLOPs: 29.05 | +7: iteration 6060/ 21553 | consumed samples: 1551360 | consumed tokens: 3177185280 | elapsed time per iteration (s): 0.30 | learning rate: 1.687E-04 | global batch size: 256 | lm loss: 2.737748E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.929 | TFLOPs: 29.96 | +7: iteration 6070/ 21553 | consumed samples: 1553920 | consumed tokens: 3182428160 | elapsed time per iteration (s): 0.30 | learning rate: 1.686E-04 | global batch size: 256 | lm loss: 2.772784E+00 | grad norm: 0.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.689 | TFLOPs: 29.43 | +7: iteration 6080/ 21553 | consumed samples: 1556480 | consumed tokens: 3187671040 | elapsed time per iteration (s): 0.30 | learning rate: 1.685E-04 | global batch size: 256 | lm loss: 2.762963E+00 | grad norm: 0.322 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.082 | TFLOPs: 29.83 | +7: iteration 6090/ 21553 | consumed samples: 1559040 | consumed tokens: 3192913920 | elapsed time per iteration (s): 0.30 | learning rate: 1.684E-04 | global batch size: 256 | lm loss: 2.741786E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.672 | TFLOPs: 29.53 | +7: iteration 6100/ 21553 | consumed samples: 1561600 | consumed tokens: 3198156800 | elapsed time per iteration (s): 0.32 | learning rate: 1.683E-04 | global batch size: 256 | lm loss: 2.765545E+00 | grad norm: 0.331 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 809.460 | TFLOPs: 28.34 | +7: iteration 6110/ 21553 | consumed samples: 1564160 | consumed tokens: 3203399680 | elapsed time per iteration (s): 0.31 | learning rate: 1.682E-04 | global batch size: 256 | lm loss: 2.732798E+00 | grad norm: 0.326 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.775 | TFLOPs: 29.15 | +7: iteration 6120/ 21553 | consumed samples: 1566720 | consumed tokens: 3208642560 | elapsed time per iteration (s): 0.31 | learning rate: 1.681E-04 | global batch size: 256 | lm loss: 2.713907E+00 | grad norm: 0.327 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.991 | TFLOPs: 29.37 | +7: iteration 6130/ 21553 | consumed samples: 1569280 | consumed tokens: 3213885440 | elapsed time per iteration (s): 0.30 | learning rate: 1.680E-04 | global batch size: 256 | lm loss: 2.750328E+00 | grad norm: 0.349 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.687 | TFLOPs: 29.71 | +7: iteration 6140/ 21553 | consumed samples: 1571840 | consumed tokens: 3219128320 | elapsed time per iteration (s): 0.31 | learning rate: 1.679E-04 | global batch size: 256 | lm loss: 2.736034E+00 | grad norm: 0.333 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.433 | TFLOPs: 29.11 | +7: iteration 6150/ 21553 | consumed samples: 1574400 | consumed tokens: 3224371200 | elapsed time per iteration (s): 0.30 | learning rate: 1.678E-04 | global batch size: 256 | lm loss: 2.738821E+00 | grad norm: 0.341 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.901 | TFLOPs: 29.72 | +7: iteration 6160/ 21553 | consumed samples: 1576960 | consumed tokens: 3229614080 | elapsed time per iteration (s): 0.31 | learning rate: 1.677E-04 | global batch size: 256 | lm loss: 2.727446E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.587 | TFLOPs: 29.18 | +7: iteration 6170/ 21553 | consumed samples: 1579520 | consumed tokens: 3234856960 | elapsed time per iteration (s): 0.30 | learning rate: 1.676E-04 | global batch size: 256 | lm loss: 2.736213E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.482 | TFLOPs: 29.84 | +7: iteration 6180/ 21553 | consumed samples: 1582080 | consumed tokens: 3240099840 | elapsed time per iteration (s): 0.30 | learning rate: 1.675E-04 | global batch size: 256 | lm loss: 2.723096E+00 | grad norm: 0.330 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.317 | TFLOPs: 29.49 | +7: iteration 6190/ 21553 | consumed samples: 1584640 | consumed tokens: 3245342720 | elapsed time per iteration (s): 0.30 | learning rate: 1.674E-04 | global batch size: 256 | lm loss: 2.758098E+00 | grad norm: 0.320 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.342 | TFLOPs: 29.66 | +7: iteration 6200/ 21553 | consumed samples: 1587200 | consumed tokens: 3250585600 | elapsed time per iteration (s): 0.31 | learning rate: 1.673E-04 | global batch size: 256 | lm loss: 2.744494E+00 | grad norm: 0.329 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.801 | TFLOPs: 29.19 | +7: iteration 6210/ 21553 | consumed samples: 1589760 | consumed tokens: 3255828480 | elapsed time per iteration (s): 0.30 | learning rate: 1.672E-04 | global batch size: 256 | lm loss: 2.779990E+00 | grad norm: 0.337 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.601 | TFLOPs: 29.64 | +7: iteration 6220/ 21553 | consumed samples: 1592320 | consumed tokens: 3261071360 | elapsed time per iteration (s): 0.30 | learning rate: 1.671E-04 | global batch size: 256 | lm loss: 2.756059E+00 | grad norm: 0.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.184 | TFLOPs: 29.87 | +7: iteration 6230/ 21553 | consumed samples: 1594880 | consumed tokens: 3266314240 | elapsed time per iteration (s): 0.30 | learning rate: 1.670E-04 | global batch size: 256 | lm loss: 2.772829E+00 | grad norm: 0.332 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.689 | TFLOPs: 29.40 | +7: iteration 6240/ 21553 | consumed samples: 1597440 | consumed tokens: 3271557120 | elapsed time per iteration (s): 0.30 | learning rate: 1.669E-04 | global batch size: 256 | lm loss: 2.769422E+00 | grad norm: 0.332 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.691 | TFLOPs: 29.92 | +7: iteration 6250/ 21553 | consumed samples: 1600000 | consumed tokens: 3276800000 | elapsed time per iteration (s): 0.30 | learning rate: 1.668E-04 | global batch size: 256 | lm loss: 2.727671E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.443 | TFLOPs: 29.88 | +7: iteration 6260/ 21553 | consumed samples: 1602560 | consumed tokens: 3282042880 | elapsed time per iteration (s): 0.30 | learning rate: 1.667E-04 | global batch size: 256 | lm loss: 2.763259E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.542 | TFLOPs: 29.64 | +7: iteration 6270/ 21553 | consumed samples: 1605120 | consumed tokens: 3287285760 | elapsed time per iteration (s): 0.30 | learning rate: 1.665E-04 | global batch size: 256 | lm loss: 2.733524E+00 | grad norm: 0.322 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.473 | TFLOPs: 29.77 | +7: iteration 6280/ 21553 | consumed samples: 1607680 | consumed tokens: 3292528640 | elapsed time per iteration (s): 0.30 | learning rate: 1.664E-04 | global batch size: 256 | lm loss: 2.730527E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.785 | TFLOPs: 29.78 | +7: iteration 6290/ 21553 | consumed samples: 1610240 | consumed tokens: 3297771520 | elapsed time per iteration (s): 0.30 | learning rate: 1.663E-04 | global batch size: 256 | lm loss: 2.723451E+00 | grad norm: 0.357 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.248 | TFLOPs: 29.76 | +7: iteration 6300/ 21553 | consumed samples: 1612800 | consumed tokens: 3303014400 | elapsed time per iteration (s): 0.30 | learning rate: 1.662E-04 | global batch size: 256 | lm loss: 2.733519E+00 | grad norm: 0.342 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.088 | TFLOPs: 30.14 | +7: iteration 6310/ 21553 | consumed samples: 1615360 | consumed tokens: 3308257280 | elapsed time per iteration (s): 0.31 | learning rate: 1.661E-04 | global batch size: 256 | lm loss: 2.719169E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.000 | TFLOPs: 28.67 | +7: iteration 6320/ 21553 | consumed samples: 1617920 | consumed tokens: 3313500160 | elapsed time per iteration (s): 0.31 | learning rate: 1.660E-04 | global batch size: 256 | lm loss: 2.741725E+00 | grad norm: 0.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.944 | TFLOPs: 28.88 | +7: iteration 6330/ 21553 | consumed samples: 1620480 | consumed tokens: 3318743040 | elapsed time per iteration (s): 0.30 | learning rate: 1.659E-04 | global batch size: 256 | lm loss: 2.773892E+00 | grad norm: 0.350 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.184 | TFLOPs: 29.45 | +7: iteration 6340/ 21553 | consumed samples: 1623040 | consumed tokens: 3323985920 | elapsed time per iteration (s): 0.30 | learning rate: 1.658E-04 | global batch size: 256 | lm loss: 2.700788E+00 | grad norm: 0.326 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.690 | TFLOPs: 29.89 | +7: iteration 6350/ 21553 | consumed samples: 1625600 | consumed tokens: 3329228800 | elapsed time per iteration (s): 0.32 | learning rate: 1.657E-04 | global batch size: 256 | lm loss: 2.740028E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 805.548 | TFLOPs: 28.20 | +7: iteration 6360/ 21553 | consumed samples: 1628160 | consumed tokens: 3334471680 | elapsed time per iteration (s): 0.31 | learning rate: 1.656E-04 | global batch size: 256 | lm loss: 2.729502E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.096 | TFLOPs: 28.95 | +7: iteration 6370/ 21553 | consumed samples: 1630720 | consumed tokens: 3339714560 | elapsed time per iteration (s): 0.31 | learning rate: 1.655E-04 | global batch size: 256 | lm loss: 2.754872E+00 | grad norm: 0.320 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.478 | TFLOPs: 29.32 | +7: iteration 6380/ 21553 | consumed samples: 1633280 | consumed tokens: 3344957440 | elapsed time per iteration (s): 0.30 | learning rate: 1.654E-04 | global batch size: 256 | lm loss: 2.719925E+00 | grad norm: 0.346 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.154 | TFLOPs: 29.52 | +7: iteration 6390/ 21553 | consumed samples: 1635840 | consumed tokens: 3350200320 | elapsed time per iteration (s): 0.30 | learning rate: 1.653E-04 | global batch size: 256 | lm loss: 2.730307E+00 | grad norm: 0.322 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.489 | TFLOPs: 29.98 | +7: iteration 6400/ 21553 | consumed samples: 1638400 | consumed tokens: 3355443200 | elapsed time per iteration (s): 0.32 | learning rate: 1.652E-04 | global batch size: 256 | lm loss: 2.730278E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 795.950 | TFLOPs: 27.86 | +7: iteration 6410/ 21553 | consumed samples: 1640960 | consumed tokens: 3360686080 | elapsed time per iteration (s): 0.30 | learning rate: 1.651E-04 | global batch size: 256 | lm loss: 2.755429E+00 | grad norm: 0.329 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.052 | TFLOPs: 29.86 | +7: iteration 6420/ 21553 | consumed samples: 1643520 | consumed tokens: 3365928960 | elapsed time per iteration (s): 0.30 | learning rate: 1.650E-04 | global batch size: 256 | lm loss: 2.709785E+00 | grad norm: 0.369 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.500 | TFLOPs: 29.88 | +7: iteration 6430/ 21553 | consumed samples: 1646080 | consumed tokens: 3371171840 | elapsed time per iteration (s): 0.30 | learning rate: 1.649E-04 | global batch size: 256 | lm loss: 2.731415E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.529 | TFLOPs: 30.12 | +7: iteration 6440/ 21553 | consumed samples: 1648640 | consumed tokens: 3376414720 | elapsed time per iteration (s): 0.30 | learning rate: 1.648E-04 | global batch size: 256 | lm loss: 2.736548E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.616 | TFLOPs: 30.02 | +7: iteration 6450/ 21553 | consumed samples: 1651200 | consumed tokens: 3381657600 | elapsed time per iteration (s): 0.31 | learning rate: 1.647E-04 | global batch size: 256 | lm loss: 2.728690E+00 | grad norm: 0.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.244 | TFLOPs: 28.75 | +7: iteration 6460/ 21553 | consumed samples: 1653760 | consumed tokens: 3386900480 | elapsed time per iteration (s): 0.30 | learning rate: 1.646E-04 | global batch size: 256 | lm loss: 2.692524E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.033 | TFLOPs: 29.58 | +7: iteration 6470/ 21553 | consumed samples: 1656320 | consumed tokens: 3392143360 | elapsed time per iteration (s): 0.30 | learning rate: 1.645E-04 | global batch size: 256 | lm loss: 2.711097E+00 | grad norm: 0.339 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.323 | TFLOPs: 29.59 | +7: iteration 6480/ 21553 | consumed samples: 1658880 | consumed tokens: 3397386240 | elapsed time per iteration (s): 0.32 | learning rate: 1.644E-04 | global batch size: 256 | lm loss: 2.741728E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 809.757 | TFLOPs: 28.35 | +7: iteration 6490/ 21553 | consumed samples: 1661440 | consumed tokens: 3402629120 | elapsed time per iteration (s): 0.30 | learning rate: 1.643E-04 | global batch size: 256 | lm loss: 2.721844E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.123 | TFLOPs: 29.52 | +7: iteration 6500/ 21553 | consumed samples: 1664000 | consumed tokens: 3407872000 | elapsed time per iteration (s): 0.31 | learning rate: 1.641E-04 | global batch size: 256 | lm loss: 2.698865E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.463 | TFLOPs: 29.32 | +7: iteration 6510/ 21553 | consumed samples: 1666560 | consumed tokens: 3413114880 | elapsed time per iteration (s): 0.30 | learning rate: 1.640E-04 | global batch size: 256 | lm loss: 2.731309E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.385 | TFLOPs: 29.56 | +7: iteration 6520/ 21553 | consumed samples: 1669120 | consumed tokens: 3418357760 | elapsed time per iteration (s): 0.31 | learning rate: 1.639E-04 | global batch size: 256 | lm loss: 2.742642E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.957 | TFLOPs: 28.81 | +7: iteration 6530/ 21553 | consumed samples: 1671680 | consumed tokens: 3423600640 | elapsed time per iteration (s): 0.31 | learning rate: 1.638E-04 | global batch size: 256 | lm loss: 2.705332E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.474 | TFLOPs: 29.25 | +7: iteration 6540/ 21553 | consumed samples: 1674240 | consumed tokens: 3428843520 | elapsed time per iteration (s): 0.30 | learning rate: 1.637E-04 | global batch size: 256 | lm loss: 2.711634E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.412 | TFLOPs: 29.53 | +7: iteration 6550/ 21553 | consumed samples: 1676800 | consumed tokens: 3434086400 | elapsed time per iteration (s): 0.31 | learning rate: 1.636E-04 | global batch size: 256 | lm loss: 2.743453E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.647 | TFLOPs: 28.87 | +7: iteration 6560/ 21553 | consumed samples: 1679360 | consumed tokens: 3439329280 | elapsed time per iteration (s): 0.31 | learning rate: 1.635E-04 | global batch size: 256 | lm loss: 2.727075E+00 | grad norm: 0.326 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.829 | TFLOPs: 29.26 | +7: iteration 6570/ 21553 | consumed samples: 1681920 | consumed tokens: 3444572160 | elapsed time per iteration (s): 0.31 | learning rate: 1.634E-04 | global batch size: 256 | lm loss: 2.727063E+00 | grad norm: 0.317 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.601 | TFLOPs: 29.11 | +7: iteration 6580/ 21553 | consumed samples: 1684480 | consumed tokens: 3449815040 | elapsed time per iteration (s): 0.30 | learning rate: 1.633E-04 | global batch size: 256 | lm loss: 2.737214E+00 | grad norm: 0.343 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.418 | TFLOPs: 30.12 | +7: iteration 6590/ 21553 | consumed samples: 1687040 | consumed tokens: 3455057920 | elapsed time per iteration (s): 0.34 | learning rate: 1.632E-04 | global batch size: 256 | lm loss: 2.726125E+00 | grad norm: 0.319 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 750.368 | TFLOPs: 26.27 | +7: iteration 6600/ 21553 | consumed samples: 1689600 | consumed tokens: 3460300800 | elapsed time per iteration (s): 0.30 | learning rate: 1.631E-04 | global batch size: 256 | lm loss: 2.724899E+00 | grad norm: 0.332 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.110 | TFLOPs: 29.51 | +7: iteration 6610/ 21553 | consumed samples: 1692160 | consumed tokens: 3465543680 | elapsed time per iteration (s): 0.30 | learning rate: 1.630E-04 | global batch size: 256 | lm loss: 2.728834E+00 | grad norm: 0.340 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.324 | TFLOPs: 29.59 | +7: iteration 6620/ 21553 | consumed samples: 1694720 | consumed tokens: 3470786560 | elapsed time per iteration (s): 0.30 | learning rate: 1.629E-04 | global batch size: 256 | lm loss: 2.701738E+00 | grad norm: 0.349 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.971 | TFLOPs: 29.62 | +7: iteration 6630/ 21553 | consumed samples: 1697280 | consumed tokens: 3476029440 | elapsed time per iteration (s): 0.31 | learning rate: 1.628E-04 | global batch size: 256 | lm loss: 2.732640E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.646 | TFLOPs: 28.90 | +7: iteration 6640/ 21553 | consumed samples: 1699840 | consumed tokens: 3481272320 | elapsed time per iteration (s): 0.30 | learning rate: 1.627E-04 | global batch size: 256 | lm loss: 2.702175E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.289 | TFLOPs: 29.66 | +7: iteration 6650/ 21553 | consumed samples: 1702400 | consumed tokens: 3486515200 | elapsed time per iteration (s): 0.30 | learning rate: 1.625E-04 | global batch size: 256 | lm loss: 2.690186E+00 | grad norm: 0.331 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.467 | TFLOPs: 29.74 | +7: iteration 6660/ 21553 | consumed samples: 1704960 | consumed tokens: 3491758080 | elapsed time per iteration (s): 0.30 | learning rate: 1.624E-04 | global batch size: 256 | lm loss: 2.745835E+00 | grad norm: 0.333 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.800 | TFLOPs: 29.64 | +7: iteration 6670/ 21553 | consumed samples: 1707520 | consumed tokens: 3497000960 | elapsed time per iteration (s): 0.30 | learning rate: 1.623E-04 | global batch size: 256 | lm loss: 2.708121E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.701 | TFLOPs: 29.57 | +7: iteration 6680/ 21553 | consumed samples: 1710080 | consumed tokens: 3502243840 | elapsed time per iteration (s): 0.31 | learning rate: 1.622E-04 | global batch size: 256 | lm loss: 2.702102E+00 | grad norm: 0.327 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.887 | TFLOPs: 29.05 | +7: iteration 6690/ 21553 | consumed samples: 1712640 | consumed tokens: 3507486720 | elapsed time per iteration (s): 0.30 | learning rate: 1.621E-04 | global batch size: 256 | lm loss: 2.719316E+00 | grad norm: 0.320 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.926 | TFLOPs: 29.54 | +7: iteration 6700/ 21553 | consumed samples: 1715200 | consumed tokens: 3512729600 | elapsed time per iteration (s): 0.30 | learning rate: 1.620E-04 | global batch size: 256 | lm loss: 2.727728E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.022 | TFLOPs: 29.76 | +7: iteration 6710/ 21553 | consumed samples: 1717760 | consumed tokens: 3517972480 | elapsed time per iteration (s): 0.30 | learning rate: 1.619E-04 | global batch size: 256 | lm loss: 2.742569E+00 | grad norm: 0.328 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.435 | TFLOPs: 29.56 | +7: iteration 6720/ 21553 | consumed samples: 1720320 | consumed tokens: 3523215360 | elapsed time per iteration (s): 0.30 | learning rate: 1.618E-04 | global batch size: 256 | lm loss: 2.698251E+00 | grad norm: 0.319 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.734 | TFLOPs: 29.43 | +7: iteration 6730/ 21553 | consumed samples: 1722880 | consumed tokens: 3528458240 | elapsed time per iteration (s): 0.30 | learning rate: 1.617E-04 | global batch size: 256 | lm loss: 2.743829E+00 | grad norm: 0.312 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.952 | TFLOPs: 29.72 | +7: iteration 6740/ 21553 | consumed samples: 1725440 | consumed tokens: 3533701120 | elapsed time per iteration (s): 0.31 | learning rate: 1.616E-04 | global batch size: 256 | lm loss: 2.719897E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.244 | TFLOPs: 29.31 | +7: iteration 6750/ 21553 | consumed samples: 1728000 | consumed tokens: 3538944000 | elapsed time per iteration (s): 0.30 | learning rate: 1.615E-04 | global batch size: 256 | lm loss: 2.698308E+00 | grad norm: 0.307 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.610 | TFLOPs: 29.39 | +7: iteration 6760/ 21553 | consumed samples: 1730560 | consumed tokens: 3544186880 | elapsed time per iteration (s): 0.31 | learning rate: 1.614E-04 | global batch size: 256 | lm loss: 2.718095E+00 | grad norm: 0.332 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.831 | TFLOPs: 28.60 | +7: iteration 6770/ 21553 | consumed samples: 1733120 | consumed tokens: 3549429760 | elapsed time per iteration (s): 0.30 | learning rate: 1.612E-04 | global batch size: 256 | lm loss: 2.678422E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.928 | TFLOPs: 29.47 | +7: iteration 6780/ 21553 | consumed samples: 1735680 | consumed tokens: 3554672640 | elapsed time per iteration (s): 0.31 | learning rate: 1.611E-04 | global batch size: 256 | lm loss: 2.691645E+00 | grad norm: 0.317 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.201 | TFLOPs: 29.27 | +7: iteration 6790/ 21553 | consumed samples: 1738240 | consumed tokens: 3559915520 | elapsed time per iteration (s): 0.31 | learning rate: 1.610E-04 | global batch size: 256 | lm loss: 2.715180E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.040 | TFLOPs: 28.85 | +7: iteration 6800/ 21553 | consumed samples: 1740800 | consumed tokens: 3565158400 | elapsed time per iteration (s): 0.31 | learning rate: 1.609E-04 | global batch size: 256 | lm loss: 2.764677E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.561 | TFLOPs: 29.25 | +7: iteration 6810/ 21553 | consumed samples: 1743360 | consumed tokens: 3570401280 | elapsed time per iteration (s): 0.30 | learning rate: 1.608E-04 | global batch size: 256 | lm loss: 2.706777E+00 | grad norm: 0.329 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.028 | TFLOPs: 29.41 | +7: iteration 6820/ 21553 | consumed samples: 1745920 | consumed tokens: 3575644160 | elapsed time per iteration (s): 0.31 | learning rate: 1.607E-04 | global batch size: 256 | lm loss: 2.708967E+00 | grad norm: 0.333 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.180 | TFLOPs: 29.06 | +7: iteration 6830/ 21553 | consumed samples: 1748480 | consumed tokens: 3580887040 | elapsed time per iteration (s): 0.31 | learning rate: 1.606E-04 | global batch size: 256 | lm loss: 2.726594E+00 | grad norm: 0.330 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.170 | TFLOPs: 29.34 | +7: iteration 6840/ 21553 | consumed samples: 1751040 | consumed tokens: 3586129920 | elapsed time per iteration (s): 0.30 | learning rate: 1.605E-04 | global batch size: 256 | lm loss: 2.715306E+00 | grad norm: 0.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.271 | TFLOPs: 29.42 | +7: iteration 6850/ 21553 | consumed samples: 1753600 | consumed tokens: 3591372800 | elapsed time per iteration (s): 0.31 | learning rate: 1.604E-04 | global batch size: 256 | lm loss: 2.712015E+00 | grad norm: 0.335 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.321 | TFLOPs: 28.68 | +7: iteration 6860/ 21553 | consumed samples: 1756160 | consumed tokens: 3596615680 | elapsed time per iteration (s): 0.30 | learning rate: 1.603E-04 | global batch size: 256 | lm loss: 2.693126E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.070 | TFLOPs: 29.51 | +7: iteration 6870/ 21553 | consumed samples: 1758720 | consumed tokens: 3601858560 | elapsed time per iteration (s): 0.31 | learning rate: 1.601E-04 | global batch size: 256 | lm loss: 2.745431E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.429 | TFLOPs: 28.83 | +7: iteration 6880/ 21553 | consumed samples: 1761280 | consumed tokens: 3607101440 | elapsed time per iteration (s): 0.31 | learning rate: 1.600E-04 | global batch size: 256 | lm loss: 2.730573E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.757 | TFLOPs: 29.05 | +7: iteration 6890/ 21553 | consumed samples: 1763840 | consumed tokens: 3612344320 | elapsed time per iteration (s): 0.30 | learning rate: 1.599E-04 | global batch size: 256 | lm loss: 2.664318E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.078 | TFLOPs: 29.86 | +7: iteration 6900/ 21553 | consumed samples: 1766400 | consumed tokens: 3617587200 | elapsed time per iteration (s): 0.31 | learning rate: 1.598E-04 | global batch size: 256 | lm loss: 2.671484E+00 | grad norm: 0.358 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.448 | TFLOPs: 29.35 | +7: iteration 6910/ 21553 | consumed samples: 1768960 | consumed tokens: 3622830080 | elapsed time per iteration (s): 0.30 | learning rate: 1.597E-04 | global batch size: 256 | lm loss: 2.710876E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.556 | TFLOPs: 29.60 | +7: iteration 6920/ 21553 | consumed samples: 1771520 | consumed tokens: 3628072960 | elapsed time per iteration (s): 0.30 | learning rate: 1.596E-04 | global batch size: 256 | lm loss: 2.705402E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.037 | TFLOPs: 29.44 | +7: iteration 6930/ 21553 | consumed samples: 1774080 | consumed tokens: 3633315840 | elapsed time per iteration (s): 0.30 | learning rate: 1.595E-04 | global batch size: 256 | lm loss: 2.698964E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.946 | TFLOPs: 29.58 | +7: iteration 6940/ 21553 | consumed samples: 1776640 | consumed tokens: 3638558720 | elapsed time per iteration (s): 0.31 | learning rate: 1.594E-04 | global batch size: 256 | lm loss: 2.720709E+00 | grad norm: 0.332 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.508 | TFLOPs: 28.69 | +7: iteration 6950/ 21553 | consumed samples: 1779200 | consumed tokens: 3643801600 | elapsed time per iteration (s): 0.31 | learning rate: 1.593E-04 | global batch size: 256 | lm loss: 2.695802E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.660 | TFLOPs: 29.25 | +7: iteration 6960/ 21553 | consumed samples: 1781760 | consumed tokens: 3649044480 | elapsed time per iteration (s): 0.31 | learning rate: 1.592E-04 | global batch size: 256 | lm loss: 2.741939E+00 | grad norm: 0.378 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.884 | TFLOPs: 28.91 | +7: iteration 6970/ 21553 | consumed samples: 1784320 | consumed tokens: 3654287360 | elapsed time per iteration (s): 0.30 | learning rate: 1.590E-04 | global batch size: 256 | lm loss: 2.700734E+00 | grad norm: 0.329 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.381 | TFLOPs: 29.77 | +7: iteration 6980/ 21553 | consumed samples: 1786880 | consumed tokens: 3659530240 | elapsed time per iteration (s): 0.30 | learning rate: 1.589E-04 | global batch size: 256 | lm loss: 2.680423E+00 | grad norm: 0.320 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.818 | TFLOPs: 29.89 | +7: iteration 6990/ 21553 | consumed samples: 1789440 | consumed tokens: 3664773120 | elapsed time per iteration (s): 0.30 | learning rate: 1.588E-04 | global batch size: 256 | lm loss: 2.735939E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.018 | TFLOPs: 29.97 | +7: iteration 7000/ 21553 | consumed samples: 1792000 | consumed tokens: 3670016000 | elapsed time per iteration (s): 0.31 | learning rate: 1.587E-04 | global batch size: 256 | lm loss: 2.719794E+00 | grad norm: 0.320 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.174 | TFLOPs: 29.34 | +7: ------------------------------------------------------------------------------------------ +7: valid loss at iteration 7000 | lm loss value: 2.658821E+00 | lm loss PPL: 1.427944E+01 | +7: ------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 7000 to checkpoints_146m +0: [2022-11-25 18:08:32,232] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step7000 is begin to save! +0: [2022-11-25 18:08:32,235] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_01-model_00-model_states.pt... +0: [2022-11-25 18:08:32,356] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_01-model_00-model_states.pt. +0: [2022-11-25 18:08:32,356] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_03-model_00-model_states.pt... +0: [2022-11-25 18:08:32,376] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_03-model_00-model_states.pt. +0: [2022-11-25 18:08:32,376] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_04-model_00-model_states.pt... +0: [2022-11-25 18:08:32,391] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_04-model_00-model_states.pt. +0: [2022-11-25 18:08:32,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_05-model_00-model_states.pt... +0: [2022-11-25 18:08:32,406] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_05-model_00-model_states.pt. +0: [2022-11-25 18:08:32,406] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_06-model_00-model_states.pt... +0: [2022-11-25 18:08:32,421] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_06-model_00-model_states.pt. +0: [2022-11-25 18:08:32,421] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_07-model_00-model_states.pt... +0: [2022-11-25 18:08:32,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_07-model_00-model_states.pt. +0: [2022-11-25 18:08:32,435] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_08-model_00-model_states.pt... +0: [2022-11-25 18:08:32,450] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_08-model_00-model_states.pt. +0: [2022-11-25 18:08:32,450] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_09-model_00-model_states.pt... +0: [2022-11-25 18:08:32,464] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_09-model_00-model_states.pt. +0: [2022-11-25 18:08:32,465] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_10-model_00-model_states.pt... +0: [2022-11-25 18:08:32,479] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_10-model_00-model_states.pt. +0: [2022-11-25 18:08:32,479] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_11-model_00-model_states.pt... +0: [2022-11-25 18:08:32,494] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_11-model_00-model_states.pt. +0: [2022-11-25 18:08:32,494] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_12-model_00-model_states.pt... +0: [2022-11-25 18:08:32,508] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_12-model_00-model_states.pt. +0: [2022-11-25 18:08:32,508] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_13-model_00-model_states.pt... +0: [2022-11-25 18:08:32,523] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_13-model_00-model_states.pt. +0: [2022-11-25 18:08:32,523] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_14-model_00-model_states.pt... +0: [2022-11-25 18:08:32,538] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_14-model_00-model_states.pt. +0: [2022-11-25 18:08:32,538] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_15-model_00-model_states.pt... +0: [2022-11-25 18:08:32,552] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_15-model_00-model_states.pt. +0: [2022-11-25 18:08:32,553] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_16-model_00-model_states.pt... +0: [2022-11-25 18:08:32,567] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_16-model_00-model_states.pt. +0: [2022-11-25 18:08:32,567] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_17-model_00-model_states.pt... +0: [2022-11-25 18:08:32,582] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_17-model_00-model_states.pt. +0: [2022-11-25 18:08:32,582] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/layer_19-model_00-model_states.pt... +0: [2022-11-25 18:08:32,583] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/layer_19-model_00-model_states.pt. +0: [2022-11-25 18:08:32,584] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step7000/mp_rank_00_model_states.pt +0: [2022-11-25 18:08:32,584] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/mp_rank_00_model_states.pt... +0: [2022-11-25 18:08:32,586] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/mp_rank_00_model_states.pt. +0: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:08:32,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step7000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:08:32,635] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:08:32,635] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2022-11-25 18:08:32,635] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +7: [2022-11-25 18:08:32,636] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:08:32,636] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 18:08:32,636] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2022-11-25 18:08:32,636] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:08:32,636] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 18:08:32,636] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2022-11-25 18:08:32,636] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:08:32,636] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 18:08:32,636] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:08:32,637] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +5: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:08:32,637] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +3: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:08:32,637] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2022-11-25 18:08:32,637] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +5: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2022-11-25 18:08:32,637] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +6: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:08:32,637] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +2: [2022-11-25 18:08:32,637] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2022-11-25 18:08:32,637] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2022-11-25 18:08:32,638] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:08:32,638] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:08:32,638] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:08:32,638] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +6: [2022-11-25 18:08:32,638] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +5: [2022-11-25 18:08:32,638] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2022-11-25 18:08:32,638] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2022-11-25 18:08:32,638] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:08:32,639] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2022-11-25 18:08:32,639] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +7: [2022-11-25 18:08:32,639] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:08:32,639] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 18:08:32,639] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2022-11-25 18:08:32,639] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:08:32,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 18:08:32,640] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2022-11-25 18:08:32,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:08:32,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 18:08:32,640] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2022-11-25 18:08:32,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:08:32,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2022-11-25 18:08:32,640] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2022-11-25 18:08:32,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:08:32,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 18:08:32,640] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2022-11-25 18:08:32,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:08:32,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:08:32,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +7: [2022-11-25 18:08:32,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +5: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +7: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +7: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:08:32,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +1: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2022-11-25 18:08:32,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:08:32,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 18:08:32,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:08:32,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +2: [2022-11-25 18:08:32,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +4: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2022-11-25 18:08:32,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:08:32,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 18:08:32,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +5: [2022-11-25 18:08:32,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +4: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2022-11-25 18:08:32,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2022-11-25 18:08:32,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:08:32,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:08:32,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 18:08:32,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2022-11-25 18:08:32,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +7: [2022-11-25 18:08:32,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:08:32,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +7: [2022-11-25 18:08:32,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 18:08:32,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2022-11-25 18:08:32,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:08:32,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2022-11-25 18:08:32,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2022-11-25 18:08:32,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2022-11-25 18:08:32,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2022-11-25 18:08:32,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:08:32,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 18:08:32,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2022-11-25 18:08:32,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:08:32,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2022-11-25 18:08:32,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2022-11-25 18:08:32,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:08:32,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 18:08:32,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2022-11-25 18:08:32,644] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:08:32,644] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 18:08:32,644] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +7: [2022-11-25 18:08:32,644] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:08:32,644] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 18:08:32,644] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2022-11-25 18:08:32,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:08:32,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +5: [2022-11-25 18:08:32,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:08:32,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +3: [2022-11-25 18:08:32,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2022-11-25 18:08:32,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2022-11-25 18:08:32,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:08:32,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:08:32,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:08:32,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:08:32,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 18:08:32,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 18:08:32,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 18:08:32,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2022-11-25 18:08:32,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 18:08:32,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2022-11-25 18:08:32,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2022-11-25 18:08:32,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +7: [2022-11-25 18:08:32,646] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:08:32,646] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 18:08:32,646] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2022-11-25 18:08:32,646] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:08:32,646] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 18:08:32,646] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:08:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:08:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 18:08:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 18:08:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:08:32,646] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:08:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 18:08:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 18:08:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +2: [2022-11-25 18:08:32,646] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +6: [2022-11-25 18:08:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 18:08:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +4: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2022-11-25 18:08:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2022-11-25 18:08:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2022-11-25 18:08:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2022-11-25 18:08:32,651] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:08:32,651] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 18:08:32,651] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2022-11-25 18:08:32,653] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:08:32,653] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:08:32,653] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +7: [2022-11-25 18:08:32,653] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +0: [2022-11-25 18:08:32,653] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +7: [2022-11-25 18:08:32,653] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2022-11-25 18:08:32,655] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:08:32,656] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 18:08:32,656] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2022-11-25 18:08:32,660] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:08:32,660] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:08:32,660] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 18:08:32,660] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 18:08:32,660] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2022-11-25 18:08:32,660] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2022-11-25 18:08:32,664] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step7000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 18:08:32,664] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: successfully saved checkpoint at iteration 7000 to checkpoints_146m +7: time (ms) | save-checkpoint: 436.27 +7: iteration 7010/ 21553 | consumed samples: 1794560 | consumed tokens: 3675258880 | elapsed time per iteration (s): 0.37 | learning rate: 1.586E-04 | global batch size: 256 | lm loss: 2.693987E+00 | grad norm: 0.327 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 689.980 | TFLOPs: 24.15 | +7: iteration 7020/ 21553 | consumed samples: 1797120 | consumed tokens: 3680501760 | elapsed time per iteration (s): 0.30 | learning rate: 1.585E-04 | global batch size: 256 | lm loss: 2.746626E+00 | grad norm: 0.319 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.119 | TFLOPs: 29.83 | +7: iteration 7030/ 21553 | consumed samples: 1799680 | consumed tokens: 3685744640 | elapsed time per iteration (s): 0.31 | learning rate: 1.584E-04 | global batch size: 256 | lm loss: 2.683045E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.292 | TFLOPs: 29.14 | +7: iteration 7040/ 21553 | consumed samples: 1802240 | consumed tokens: 3690987520 | elapsed time per iteration (s): 0.31 | learning rate: 1.583E-04 | global batch size: 256 | lm loss: 2.724475E+00 | grad norm: 0.307 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.863 | TFLOPs: 28.77 | +7: iteration 7050/ 21553 | consumed samples: 1804800 | consumed tokens: 3696230400 | elapsed time per iteration (s): 0.31 | learning rate: 1.582E-04 | global batch size: 256 | lm loss: 2.722938E+00 | grad norm: 0.314 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.853 | TFLOPs: 28.98 | +7: iteration 7060/ 21553 | consumed samples: 1807360 | consumed tokens: 3701473280 | elapsed time per iteration (s): 0.30 | learning rate: 1.580E-04 | global batch size: 256 | lm loss: 2.693465E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.130 | TFLOPs: 29.45 | +7: iteration 7070/ 21553 | consumed samples: 1809920 | consumed tokens: 3706716160 | elapsed time per iteration (s): 0.30 | learning rate: 1.579E-04 | global batch size: 256 | lm loss: 2.710551E+00 | grad norm: 0.326 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.414 | TFLOPs: 29.77 | +7: iteration 7080/ 21553 | consumed samples: 1812480 | consumed tokens: 3711959040 | elapsed time per iteration (s): 0.34 | learning rate: 1.578E-04 | global batch size: 256 | lm loss: 2.691223E+00 | grad norm: 0.307 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 753.410 | TFLOPs: 26.37 | +7: iteration 7090/ 21553 | consumed samples: 1815040 | consumed tokens: 3717201920 | elapsed time per iteration (s): 0.30 | learning rate: 1.577E-04 | global batch size: 256 | lm loss: 2.714906E+00 | grad norm: 0.321 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.994 | TFLOPs: 29.69 | +7: iteration 7100/ 21553 | consumed samples: 1817600 | consumed tokens: 3722444800 | elapsed time per iteration (s): 0.30 | learning rate: 1.576E-04 | global batch size: 256 | lm loss: 2.676307E+00 | grad norm: 0.331 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.842 | TFLOPs: 29.47 | +7: iteration 7110/ 21553 | consumed samples: 1820160 | consumed tokens: 3727687680 | elapsed time per iteration (s): 0.30 | learning rate: 1.575E-04 | global batch size: 256 | lm loss: 2.736167E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.170 | TFLOPs: 29.41 | +7: iteration 7120/ 21553 | consumed samples: 1822720 | consumed tokens: 3732930560 | elapsed time per iteration (s): 0.30 | learning rate: 1.574E-04 | global batch size: 256 | lm loss: 2.698357E+00 | grad norm: 0.345 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.997 | TFLOPs: 29.51 | +7: iteration 7130/ 21553 | consumed samples: 1825280 | consumed tokens: 3738173440 | elapsed time per iteration (s): 0.30 | learning rate: 1.573E-04 | global batch size: 256 | lm loss: 2.719727E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.521 | TFLOPs: 29.67 | +7: iteration 7140/ 21553 | consumed samples: 1827840 | consumed tokens: 3743416320 | elapsed time per iteration (s): 0.31 | learning rate: 1.571E-04 | global batch size: 256 | lm loss: 2.697690E+00 | grad norm: 0.314 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.482 | TFLOPs: 29.25 | +7: iteration 7150/ 21553 | consumed samples: 1830400 | consumed tokens: 3748659200 | elapsed time per iteration (s): 0.30 | learning rate: 1.570E-04 | global batch size: 256 | lm loss: 2.704247E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.071 | TFLOPs: 29.55 | +7: iteration 7160/ 21553 | consumed samples: 1832960 | consumed tokens: 3753902080 | elapsed time per iteration (s): 0.31 | learning rate: 1.569E-04 | global batch size: 256 | lm loss: 2.729508E+00 | grad norm: 0.337 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 814.639 | TFLOPs: 28.52 | +7: iteration 7170/ 21553 | consumed samples: 1835520 | consumed tokens: 3759144960 | elapsed time per iteration (s): 0.30 | learning rate: 1.568E-04 | global batch size: 256 | lm loss: 2.730865E+00 | grad norm: 0.312 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.419 | TFLOPs: 29.53 | +7: iteration 7180/ 21553 | consumed samples: 1838080 | consumed tokens: 3764387840 | elapsed time per iteration (s): 0.31 | learning rate: 1.567E-04 | global batch size: 256 | lm loss: 2.711782E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.588 | TFLOPs: 29.11 | +7: iteration 7190/ 21553 | consumed samples: 1840640 | consumed tokens: 3769630720 | elapsed time per iteration (s): 0.30 | learning rate: 1.566E-04 | global batch size: 256 | lm loss: 2.682850E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.174 | TFLOPs: 29.62 | +7: iteration 7200/ 21553 | consumed samples: 1843200 | consumed tokens: 3774873600 | elapsed time per iteration (s): 0.31 | learning rate: 1.565E-04 | global batch size: 256 | lm loss: 2.690345E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.344 | TFLOPs: 29.24 | +7: iteration 7210/ 21553 | consumed samples: 1845760 | consumed tokens: 3780116480 | elapsed time per iteration (s): 0.30 | learning rate: 1.563E-04 | global batch size: 256 | lm loss: 2.707516E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.115 | TFLOPs: 29.69 | +7: iteration 7220/ 21553 | consumed samples: 1848320 | consumed tokens: 3785359360 | elapsed time per iteration (s): 0.30 | learning rate: 1.562E-04 | global batch size: 256 | lm loss: 2.701301E+00 | grad norm: 0.331 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.157 | TFLOPs: 29.66 | +7: iteration 7230/ 21553 | consumed samples: 1850880 | consumed tokens: 3790602240 | elapsed time per iteration (s): 0.30 | learning rate: 1.561E-04 | global batch size: 256 | lm loss: 2.711181E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.712 | TFLOPs: 29.64 | +7: iteration 7240/ 21553 | consumed samples: 1853440 | consumed tokens: 3795845120 | elapsed time per iteration (s): 0.30 | learning rate: 1.560E-04 | global batch size: 256 | lm loss: 2.720767E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.851 | TFLOPs: 29.79 | +7: iteration 7250/ 21553 | consumed samples: 1856000 | consumed tokens: 3801088000 | elapsed time per iteration (s): 0.30 | learning rate: 1.559E-04 | global batch size: 256 | lm loss: 2.734970E+00 | grad norm: 0.317 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.885 | TFLOPs: 29.44 | +7: iteration 7260/ 21553 | consumed samples: 1858560 | consumed tokens: 3806330880 | elapsed time per iteration (s): 0.30 | learning rate: 1.558E-04 | global batch size: 256 | lm loss: 2.678557E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.972 | TFLOPs: 29.69 | +7: iteration 7270/ 21553 | consumed samples: 1861120 | consumed tokens: 3811573760 | elapsed time per iteration (s): 0.30 | learning rate: 1.557E-04 | global batch size: 256 | lm loss: 2.719697E+00 | grad norm: 0.329 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.678 | TFLOPs: 29.57 | +7: iteration 7280/ 21553 | consumed samples: 1863680 | consumed tokens: 3816816640 | elapsed time per iteration (s): 0.31 | learning rate: 1.556E-04 | global batch size: 256 | lm loss: 2.725847E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.138 | TFLOPs: 28.71 | +7: iteration 7290/ 21553 | consumed samples: 1866240 | consumed tokens: 3822059520 | elapsed time per iteration (s): 0.30 | learning rate: 1.554E-04 | global batch size: 256 | lm loss: 2.711536E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.199 | TFLOPs: 29.52 | +7: iteration 7300/ 21553 | consumed samples: 1868800 | consumed tokens: 3827302400 | elapsed time per iteration (s): 0.31 | learning rate: 1.553E-04 | global batch size: 256 | lm loss: 2.720800E+00 | grad norm: 0.314 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.389 | TFLOPs: 29.24 | +7: iteration 7310/ 21553 | consumed samples: 1871360 | consumed tokens: 3832545280 | elapsed time per iteration (s): 0.30 | learning rate: 1.552E-04 | global batch size: 256 | lm loss: 2.706649E+00 | grad norm: 0.321 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.085 | TFLOPs: 29.41 | +7: iteration 7320/ 21553 | consumed samples: 1873920 | consumed tokens: 3837788160 | elapsed time per iteration (s): 0.30 | learning rate: 1.551E-04 | global batch size: 256 | lm loss: 2.693125E+00 | grad norm: 0.315 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.638 | TFLOPs: 29.81 | +7: iteration 7330/ 21553 | consumed samples: 1876480 | consumed tokens: 3843031040 | elapsed time per iteration (s): 0.30 | learning rate: 1.550E-04 | global batch size: 256 | lm loss: 2.708990E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.373 | TFLOPs: 29.42 | +7: iteration 7340/ 21553 | consumed samples: 1879040 | consumed tokens: 3848273920 | elapsed time per iteration (s): 0.30 | learning rate: 1.549E-04 | global batch size: 256 | lm loss: 2.675484E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.399 | TFLOPs: 29.42 | +7: iteration 7350/ 21553 | consumed samples: 1881600 | consumed tokens: 3853516800 | elapsed time per iteration (s): 0.30 | learning rate: 1.547E-04 | global batch size: 256 | lm loss: 2.696887E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.358 | TFLOPs: 29.59 | +7: iteration 7360/ 21553 | consumed samples: 1884160 | consumed tokens: 3858759680 | elapsed time per iteration (s): 0.30 | learning rate: 1.546E-04 | global batch size: 256 | lm loss: 2.715413E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.071 | TFLOPs: 29.83 | +7: iteration 7370/ 21553 | consumed samples: 1886720 | consumed tokens: 3864002560 | elapsed time per iteration (s): 0.31 | learning rate: 1.545E-04 | global batch size: 256 | lm loss: 2.696078E+00 | grad norm: 0.325 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.890 | TFLOPs: 29.19 | +7: iteration 7380/ 21553 | consumed samples: 1889280 | consumed tokens: 3869245440 | elapsed time per iteration (s): 0.31 | learning rate: 1.544E-04 | global batch size: 256 | lm loss: 2.700954E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.846 | TFLOPs: 28.98 | +7: iteration 7390/ 21553 | consumed samples: 1891840 | consumed tokens: 3874488320 | elapsed time per iteration (s): 0.30 | learning rate: 1.543E-04 | global batch size: 256 | lm loss: 2.667719E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.781 | TFLOPs: 29.99 | +7: iteration 7400/ 21553 | consumed samples: 1894400 | consumed tokens: 3879731200 | elapsed time per iteration (s): 0.30 | learning rate: 1.542E-04 | global batch size: 256 | lm loss: 2.708618E+00 | grad norm: 0.314 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.955 | TFLOPs: 29.82 | +7: iteration 7410/ 21553 | consumed samples: 1896960 | consumed tokens: 3884974080 | elapsed time per iteration (s): 0.30 | learning rate: 1.541E-04 | global batch size: 256 | lm loss: 2.716286E+00 | grad norm: 0.322 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.519 | TFLOPs: 29.77 | +7: iteration 7420/ 21553 | consumed samples: 1899520 | consumed tokens: 3890216960 | elapsed time per iteration (s): 0.31 | learning rate: 1.539E-04 | global batch size: 256 | lm loss: 2.666396E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.253 | TFLOPs: 29.27 | +7: iteration 7430/ 21553 | consumed samples: 1902080 | consumed tokens: 3895459840 | elapsed time per iteration (s): 0.30 | learning rate: 1.538E-04 | global batch size: 256 | lm loss: 2.696974E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.954 | TFLOPs: 29.40 | +7: iteration 7440/ 21553 | consumed samples: 1904640 | consumed tokens: 3900702720 | elapsed time per iteration (s): 0.30 | learning rate: 1.537E-04 | global batch size: 256 | lm loss: 2.646191E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.253 | TFLOPs: 29.55 | +7: iteration 7450/ 21553 | consumed samples: 1907200 | consumed tokens: 3905945600 | elapsed time per iteration (s): 0.30 | learning rate: 1.536E-04 | global batch size: 256 | lm loss: 2.695381E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.775 | TFLOPs: 29.64 | +7: iteration 7460/ 21553 | consumed samples: 1909760 | consumed tokens: 3911188480 | elapsed time per iteration (s): 0.30 | learning rate: 1.535E-04 | global batch size: 256 | lm loss: 2.707362E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.801 | TFLOPs: 30.06 | +7: iteration 7470/ 21553 | consumed samples: 1912320 | consumed tokens: 3916431360 | elapsed time per iteration (s): 0.30 | learning rate: 1.534E-04 | global batch size: 256 | lm loss: 2.670322E+00 | grad norm: 0.312 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.323 | TFLOPs: 29.52 | +7: iteration 7480/ 21553 | consumed samples: 1914880 | consumed tokens: 3921674240 | elapsed time per iteration (s): 0.30 | learning rate: 1.532E-04 | global batch size: 256 | lm loss: 2.670332E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.199 | TFLOPs: 29.48 | +7: iteration 7490/ 21553 | consumed samples: 1917440 | consumed tokens: 3926917120 | elapsed time per iteration (s): 0.32 | learning rate: 1.531E-04 | global batch size: 256 | lm loss: 2.703151E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 789.565 | TFLOPs: 27.64 | +7: iteration 7500/ 21553 | consumed samples: 1920000 | consumed tokens: 3932160000 | elapsed time per iteration (s): 0.31 | learning rate: 1.530E-04 | global batch size: 256 | lm loss: 2.662816E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.849 | TFLOPs: 28.95 | +7: iteration 7510/ 21553 | consumed samples: 1922560 | consumed tokens: 3937402880 | elapsed time per iteration (s): 0.30 | learning rate: 1.529E-04 | global batch size: 256 | lm loss: 2.670765E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.102 | TFLOPs: 30.04 | +7: iteration 7520/ 21553 | consumed samples: 1925120 | consumed tokens: 3942645760 | elapsed time per iteration (s): 0.36 | learning rate: 1.528E-04 | global batch size: 256 | lm loss: 2.717809E+00 | grad norm: 0.319 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 717.785 | TFLOPs: 25.13 | +7: iteration 7530/ 21553 | consumed samples: 1927680 | consumed tokens: 3947888640 | elapsed time per iteration (s): 0.32 | learning rate: 1.527E-04 | global batch size: 256 | lm loss: 2.720539E+00 | grad norm: 0.358 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 792.665 | TFLOPs: 27.75 | +7: iteration 7540/ 21553 | consumed samples: 1930240 | consumed tokens: 3953131520 | elapsed time per iteration (s): 0.30 | learning rate: 1.525E-04 | global batch size: 256 | lm loss: 2.703186E+00 | grad norm: 0.314 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.940 | TFLOPs: 29.65 | +7: iteration 7550/ 21553 | consumed samples: 1932800 | consumed tokens: 3958374400 | elapsed time per iteration (s): 0.30 | learning rate: 1.524E-04 | global batch size: 256 | lm loss: 2.676353E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.676 | TFLOPs: 29.57 | +7: iteration 7560/ 21553 | consumed samples: 1935360 | consumed tokens: 3963617280 | elapsed time per iteration (s): 0.31 | learning rate: 1.523E-04 | global batch size: 256 | lm loss: 2.677600E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.468 | TFLOPs: 29.14 | +7: iteration 7570/ 21553 | consumed samples: 1937920 | consumed tokens: 3968860160 | elapsed time per iteration (s): 0.32 | learning rate: 1.522E-04 | global batch size: 256 | lm loss: 2.690257E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 797.860 | TFLOPs: 27.93 | +7: iteration 7580/ 21553 | consumed samples: 1940480 | consumed tokens: 3974103040 | elapsed time per iteration (s): 0.30 | learning rate: 1.521E-04 | global batch size: 256 | lm loss: 2.712186E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.543 | TFLOPs: 29.74 | +7: iteration 7590/ 21553 | consumed samples: 1943040 | consumed tokens: 3979345920 | elapsed time per iteration (s): 0.30 | learning rate: 1.520E-04 | global batch size: 256 | lm loss: 2.715435E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.379 | TFLOPs: 29.59 | +7: iteration 7600/ 21553 | consumed samples: 1945600 | consumed tokens: 3984588800 | elapsed time per iteration (s): 0.30 | learning rate: 1.518E-04 | global batch size: 256 | lm loss: 2.681051E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.530 | TFLOPs: 29.81 | +7: iteration 7610/ 21553 | consumed samples: 1948160 | consumed tokens: 3989831680 | elapsed time per iteration (s): 0.30 | learning rate: 1.517E-04 | global batch size: 256 | lm loss: 2.730650E+00 | grad norm: 0.338 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.276 | TFLOPs: 29.56 | +7: iteration 7620/ 21553 | consumed samples: 1950720 | consumed tokens: 3995074560 | elapsed time per iteration (s): 0.30 | learning rate: 1.516E-04 | global batch size: 256 | lm loss: 2.648528E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.039 | TFLOPs: 29.93 | +7: iteration 7630/ 21553 | consumed samples: 1953280 | consumed tokens: 4000317440 | elapsed time per iteration (s): 0.30 | learning rate: 1.515E-04 | global batch size: 256 | lm loss: 2.672514E+00 | grad norm: 0.321 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.078 | TFLOPs: 29.65 | +7: iteration 7640/ 21553 | consumed samples: 1955840 | consumed tokens: 4005560320 | elapsed time per iteration (s): 0.30 | learning rate: 1.514E-04 | global batch size: 256 | lm loss: 2.705685E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.705 | TFLOPs: 29.85 | +7: iteration 7650/ 21553 | consumed samples: 1958400 | consumed tokens: 4010803200 | elapsed time per iteration (s): 0.30 | learning rate: 1.513E-04 | global batch size: 256 | lm loss: 2.698215E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.022 | TFLOPs: 29.48 | +7: iteration 7660/ 21553 | consumed samples: 1960960 | consumed tokens: 4016046080 | elapsed time per iteration (s): 0.32 | learning rate: 1.511E-04 | global batch size: 256 | lm loss: 2.651728E+00 | grad norm: 0.312 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 811.090 | TFLOPs: 28.39 | +7: iteration 7670/ 21553 | consumed samples: 1963520 | consumed tokens: 4021288960 | elapsed time per iteration (s): 0.31 | learning rate: 1.510E-04 | global batch size: 256 | lm loss: 2.688355E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.301 | TFLOPs: 28.47 | +7: iteration 7680/ 21553 | consumed samples: 1966080 | consumed tokens: 4026531840 | elapsed time per iteration (s): 0.30 | learning rate: 1.509E-04 | global batch size: 256 | lm loss: 2.678186E+00 | grad norm: 0.596 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.294 | TFLOPs: 29.80 | +7: iteration 7690/ 21553 | consumed samples: 1968640 | consumed tokens: 4031774720 | elapsed time per iteration (s): 0.30 | learning rate: 1.508E-04 | global batch size: 256 | lm loss: 2.716399E+00 | grad norm: 0.336 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.613 | TFLOPs: 29.67 | +7: iteration 7700/ 21553 | consumed samples: 1971200 | consumed tokens: 4037017600 | elapsed time per iteration (s): 0.31 | learning rate: 1.507E-04 | global batch size: 256 | lm loss: 2.688729E+00 | grad norm: 0.320 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.068 | TFLOPs: 28.71 | +7: iteration 7710/ 21553 | consumed samples: 1973760 | consumed tokens: 4042260480 | elapsed time per iteration (s): 0.30 | learning rate: 1.505E-04 | global batch size: 256 | lm loss: 2.685174E+00 | grad norm: 0.332 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.944 | TFLOPs: 29.65 | +7: iteration 7720/ 21553 | consumed samples: 1976320 | consumed tokens: 4047503360 | elapsed time per iteration (s): 0.30 | learning rate: 1.504E-04 | global batch size: 256 | lm loss: 2.691434E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.286 | TFLOPs: 30.12 | +7: iteration 7730/ 21553 | consumed samples: 1978880 | consumed tokens: 4052746240 | elapsed time per iteration (s): 0.30 | learning rate: 1.503E-04 | global batch size: 256 | lm loss: 2.716007E+00 | grad norm: 0.307 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.365 | TFLOPs: 29.45 | +7: iteration 7740/ 21553 | consumed samples: 1981440 | consumed tokens: 4057989120 | elapsed time per iteration (s): 0.30 | learning rate: 1.502E-04 | global batch size: 256 | lm loss: 2.693419E+00 | grad norm: 0.329 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.423 | TFLOPs: 29.84 | +7: iteration 7750/ 21553 | consumed samples: 1984000 | consumed tokens: 4063232000 | elapsed time per iteration (s): 0.30 | learning rate: 1.501E-04 | global batch size: 256 | lm loss: 2.694176E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.052 | TFLOPs: 29.58 | +7: iteration 7760/ 21553 | consumed samples: 1986560 | consumed tokens: 4068474880 | elapsed time per iteration (s): 0.30 | learning rate: 1.500E-04 | global batch size: 256 | lm loss: 2.700196E+00 | grad norm: 0.331 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.676 | TFLOPs: 29.74 | +7: iteration 7770/ 21553 | consumed samples: 1989120 | consumed tokens: 4073717760 | elapsed time per iteration (s): 0.30 | learning rate: 1.498E-04 | global batch size: 256 | lm loss: 2.686483E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.154 | TFLOPs: 30.04 | +7: iteration 7780/ 21553 | consumed samples: 1991680 | consumed tokens: 4078960640 | elapsed time per iteration (s): 0.31 | learning rate: 1.497E-04 | global batch size: 256 | lm loss: 2.730512E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.799 | TFLOPs: 28.84 | +7: iteration 7790/ 21553 | consumed samples: 1994240 | consumed tokens: 4084203520 | elapsed time per iteration (s): 0.30 | learning rate: 1.496E-04 | global batch size: 256 | lm loss: 2.679520E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.877 | TFLOPs: 29.86 | +7: iteration 7800/ 21553 | consumed samples: 1996800 | consumed tokens: 4089446400 | elapsed time per iteration (s): 0.30 | learning rate: 1.495E-04 | global batch size: 256 | lm loss: 2.678195E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.661 | TFLOPs: 29.39 | +7: iteration 7810/ 21553 | consumed samples: 1999360 | consumed tokens: 4094689280 | elapsed time per iteration (s): 0.30 | learning rate: 1.494E-04 | global batch size: 256 | lm loss: 2.669142E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.852 | TFLOPs: 29.75 | +7: iteration 7820/ 21553 | consumed samples: 2001920 | consumed tokens: 4099932160 | elapsed time per iteration (s): 0.30 | learning rate: 1.492E-04 | global batch size: 256 | lm loss: 2.722357E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.550 | TFLOPs: 29.39 | +7: iteration 7830/ 21553 | consumed samples: 2004480 | consumed tokens: 4105175040 | elapsed time per iteration (s): 0.30 | learning rate: 1.491E-04 | global batch size: 256 | lm loss: 2.670484E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.433 | TFLOPs: 29.53 | +7: iteration 7840/ 21553 | consumed samples: 2007040 | consumed tokens: 4110417920 | elapsed time per iteration (s): 0.31 | learning rate: 1.490E-04 | global batch size: 256 | lm loss: 2.667515E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.069 | TFLOPs: 28.67 | +7: iteration 7850/ 21553 | consumed samples: 2009600 | consumed tokens: 4115660800 | elapsed time per iteration (s): 0.31 | learning rate: 1.489E-04 | global batch size: 256 | lm loss: 2.710969E+00 | grad norm: 0.323 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.252 | TFLOPs: 29.24 | +7: iteration 7860/ 21553 | consumed samples: 2012160 | consumed tokens: 4120903680 | elapsed time per iteration (s): 0.30 | learning rate: 1.488E-04 | global batch size: 256 | lm loss: 2.670992E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.879 | TFLOPs: 29.65 | +7: iteration 7870/ 21553 | consumed samples: 2014720 | consumed tokens: 4126146560 | elapsed time per iteration (s): 0.30 | learning rate: 1.486E-04 | global batch size: 256 | lm loss: 2.696637E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.054 | TFLOPs: 29.72 | +7: iteration 7880/ 21553 | consumed samples: 2017280 | consumed tokens: 4131389440 | elapsed time per iteration (s): 0.30 | learning rate: 1.485E-04 | global batch size: 256 | lm loss: 2.674227E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.013 | TFLOPs: 29.55 | +7: iteration 7890/ 21553 | consumed samples: 2019840 | consumed tokens: 4136632320 | elapsed time per iteration (s): 0.30 | learning rate: 1.484E-04 | global batch size: 256 | lm loss: 2.677986E+00 | grad norm: 0.327 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.948 | TFLOPs: 29.96 | +7: iteration 7900/ 21553 | consumed samples: 2022400 | consumed tokens: 4141875200 | elapsed time per iteration (s): 0.30 | learning rate: 1.483E-04 | global batch size: 256 | lm loss: 2.654003E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.832 | TFLOPs: 29.61 | +7: iteration 7910/ 21553 | consumed samples: 2024960 | consumed tokens: 4147118080 | elapsed time per iteration (s): 0.30 | learning rate: 1.482E-04 | global batch size: 256 | lm loss: 2.703624E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.786 | TFLOPs: 29.54 | +7: iteration 7920/ 21553 | consumed samples: 2027520 | consumed tokens: 4152360960 | elapsed time per iteration (s): 0.30 | learning rate: 1.480E-04 | global batch size: 256 | lm loss: 2.663383E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.271 | TFLOPs: 29.77 | +7: iteration 7930/ 21553 | consumed samples: 2030080 | consumed tokens: 4157603840 | elapsed time per iteration (s): 0.30 | learning rate: 1.479E-04 | global batch size: 256 | lm loss: 2.710731E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.837 | TFLOPs: 29.82 | +7: iteration 7940/ 21553 | consumed samples: 2032640 | consumed tokens: 4162846720 | elapsed time per iteration (s): 0.30 | learning rate: 1.478E-04 | global batch size: 256 | lm loss: 2.647705E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.148 | TFLOPs: 29.55 | +7: iteration 7950/ 21553 | consumed samples: 2035200 | consumed tokens: 4168089600 | elapsed time per iteration (s): 0.30 | learning rate: 1.477E-04 | global batch size: 256 | lm loss: 2.652871E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.157 | TFLOPs: 29.59 | +7: iteration 7960/ 21553 | consumed samples: 2037760 | consumed tokens: 4173332480 | elapsed time per iteration (s): 0.30 | learning rate: 1.476E-04 | global batch size: 256 | lm loss: 2.727497E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.480 | TFLOPs: 29.63 | +7: iteration 7970/ 21553 | consumed samples: 2040320 | consumed tokens: 4178575360 | elapsed time per iteration (s): 0.30 | learning rate: 1.474E-04 | global batch size: 256 | lm loss: 2.688192E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.653 | TFLOPs: 29.46 | +7: iteration 7980/ 21553 | consumed samples: 2042880 | consumed tokens: 4183818240 | elapsed time per iteration (s): 0.30 | learning rate: 1.473E-04 | global batch size: 256 | lm loss: 2.679510E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.025 | TFLOPs: 29.76 | +7: iteration 7990/ 21553 | consumed samples: 2045440 | consumed tokens: 4189061120 | elapsed time per iteration (s): 0.31 | learning rate: 1.472E-04 | global batch size: 256 | lm loss: 2.717732E+00 | grad norm: 0.326 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.866 | TFLOPs: 28.88 | +0: [2022-11-25 18:13:38,120] [INFO] [logging.py:68:log_dist] [Rank 0] step=8000, skipped=0, lr=[0.00014708123251590496, 0.00014708123251590496, 0.00014708123251590496], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 8000/ 21553 | consumed samples: 2048000 | consumed tokens: 4194304000 | elapsed time per iteration (s): 0.30 | learning rate: 1.471E-04 | global batch size: 256 | lm loss: 2.679197E+00 | grad norm: 0.317 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.653 | TFLOPs: 29.39 | +0: steps: 8000 loss: 2.6739 iter time (s): 0.303 samples/sec: 844.547 +7: ------------------------------------------------------------------------------------------ +7: valid loss at iteration 8000 | lm loss value: 2.636105E+00 | lm loss PPL: 1.395873E+01 | +7: ------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 8000 to checkpoints_146m +0: [2022-11-25 18:13:38,240] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step8000 is begin to save! +0: [2022-11-25 18:13:38,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_01-model_00-model_states.pt... +0: [2022-11-25 18:13:38,327] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_01-model_00-model_states.pt. +0: [2022-11-25 18:13:38,328] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_03-model_00-model_states.pt... +0: [2022-11-25 18:13:38,344] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_03-model_00-model_states.pt. +0: [2022-11-25 18:13:38,344] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_04-model_00-model_states.pt... +0: [2022-11-25 18:13:38,359] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_04-model_00-model_states.pt. +0: [2022-11-25 18:13:38,359] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_05-model_00-model_states.pt... +0: [2022-11-25 18:13:38,374] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_05-model_00-model_states.pt. +0: [2022-11-25 18:13:38,374] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_06-model_00-model_states.pt... +0: [2022-11-25 18:13:38,389] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_06-model_00-model_states.pt. +0: [2022-11-25 18:13:38,390] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_07-model_00-model_states.pt... +0: [2022-11-25 18:13:38,404] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_07-model_00-model_states.pt. +0: [2022-11-25 18:13:38,405] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_08-model_00-model_states.pt... +0: [2022-11-25 18:13:38,420] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_08-model_00-model_states.pt. +0: [2022-11-25 18:13:38,420] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_09-model_00-model_states.pt... +0: [2022-11-25 18:13:38,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_09-model_00-model_states.pt. +0: [2022-11-25 18:13:38,435] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_10-model_00-model_states.pt... +0: [2022-11-25 18:13:38,450] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_10-model_00-model_states.pt. +0: [2022-11-25 18:13:38,451] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_11-model_00-model_states.pt... +0: [2022-11-25 18:13:38,466] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_11-model_00-model_states.pt. +0: [2022-11-25 18:13:38,466] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_12-model_00-model_states.pt... +0: [2022-11-25 18:13:38,481] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_12-model_00-model_states.pt. +0: [2022-11-25 18:13:38,481] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_13-model_00-model_states.pt... +0: [2022-11-25 18:13:38,496] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_13-model_00-model_states.pt. +0: [2022-11-25 18:13:38,496] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_14-model_00-model_states.pt... +0: [2022-11-25 18:13:38,511] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_14-model_00-model_states.pt. +0: [2022-11-25 18:13:38,511] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_15-model_00-model_states.pt... +0: [2022-11-25 18:13:38,526] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_15-model_00-model_states.pt. +0: [2022-11-25 18:13:38,526] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_16-model_00-model_states.pt... +0: [2022-11-25 18:13:38,541] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_16-model_00-model_states.pt. +0: [2022-11-25 18:13:38,541] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_17-model_00-model_states.pt... +0: [2022-11-25 18:13:38,556] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_17-model_00-model_states.pt. +0: [2022-11-25 18:13:38,556] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/layer_19-model_00-model_states.pt... +0: [2022-11-25 18:13:38,557] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/layer_19-model_00-model_states.pt. +0: [2022-11-25 18:13:38,558] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step8000/mp_rank_00_model_states.pt +0: [2022-11-25 18:13:38,558] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/mp_rank_00_model_states.pt... +0: [2022-11-25 18:13:38,560] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/mp_rank_00_model_states.pt. +0: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:13:38,578] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step8000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:13:38,611] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:13:38,611] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 18:13:38,611] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2022-11-25 18:13:38,611] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:13:38,612] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 18:13:38,612] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2022-11-25 18:13:38,612] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:13:38,612] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:13:38,613] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 18:13:38,613] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 18:13:38,613] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2022-11-25 18:13:38,613] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2022-11-25 18:13:38,613] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:13:38,613] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +2: [2022-11-25 18:13:38,612] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:13:38,612] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:13:38,613] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2022-11-25 18:13:38,612] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +6: [2022-11-25 18:13:38,612] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +2: [2022-11-25 18:13:38,612] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2022-11-25 18:13:38,612] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2022-11-25 18:13:38,613] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:13:38,613] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +5: [2022-11-25 18:13:38,613] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:13:38,613] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2022-11-25 18:13:38,613] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:13:38,613] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +5: [2022-11-25 18:13:38,613] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +2: [2022-11-25 18:13:38,613] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2022-11-25 18:13:38,613] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2022-11-25 18:13:38,614] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:13:38,614] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1: [2022-11-25 18:13:38,614] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:13:38,614] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:13:38,614] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 18:13:38,614] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 18:13:38,614] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2022-11-25 18:13:38,614] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2022-11-25 18:13:38,614] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2022-11-25 18:13:38,614] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:13:38,614] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:13:38,614] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 18:13:38,614] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2022-11-25 18:13:38,614] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:13:38,614] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:13:38,615] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +5: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:13:38,615] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +3: [2022-11-25 18:13:38,614] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2022-11-25 18:13:38,615] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +5: [2022-11-25 18:13:38,615] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +7: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:13:38,615] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +3: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:13:38,615] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:13:38,615] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +3: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2022-11-25 18:13:38,615] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +7: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2022-11-25 18:13:38,615] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:13:38,616] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:13:38,616] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:13:38,616] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 18:13:38,617] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2022-11-25 18:13:38,616] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +3: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:13:38,617] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2022-11-25 18:13:38,617] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 18:13:38,617] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2022-11-25 18:13:38,617] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:13:38,617] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 18:13:38,617] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2022-11-25 18:13:38,618] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:13:38,618] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 18:13:38,618] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2022-11-25 18:13:38,619] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:13:38,619] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:13:38,620] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 18:13:38,620] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2022-11-25 18:13:38,621] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:13:38,621] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 18:13:38,621] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2022-11-25 18:13:38,623] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:13:38,623] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 18:13:38,623] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2022-11-25 18:13:38,619] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 18:13:38,619] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2022-11-25 18:13:38,619] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:13:38,619] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 18:13:38,619] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:13:38,619] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:13:38,619] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 18:13:38,619] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2022-11-25 18:13:38,620] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:13:38,621] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 18:13:38,621] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2022-11-25 18:13:38,626] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +1: [2022-11-25 18:13:38,622] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:13:38,622] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 18:13:38,622] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:13:38,615] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +2: [2022-11-25 18:13:38,616] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +6: [2022-11-25 18:13:38,615] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:13:38,628] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:13:38,628] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:13:38,616] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +2: [2022-11-25 18:13:38,617] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +6: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2022-11-25 18:13:38,628] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +2: [2022-11-25 18:13:38,617] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:13:38,628] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +2: [2022-11-25 18:13:38,623] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:13:38,616] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +0: [2022-11-25 18:13:38,628] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2022-11-25 18:13:38,623] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:13:38,623] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +6: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2022-11-25 18:13:38,628] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2022-11-25 18:13:38,623] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +6: [2022-11-25 18:13:38,618] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:13:38,623] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2022-11-25 18:13:38,618] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +2: [2022-11-25 18:13:38,623] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2022-11-25 18:13:38,618] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2022-11-25 18:13:38,625] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:13:38,625] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:13:38,625] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:13:38,625] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 18:13:38,625] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2022-11-25 18:13:38,625] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2022-11-25 18:13:38,628] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2022-11-25 18:13:38,628] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:13:38,628] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 18:13:38,628] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 18:13:38,628] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2022-11-25 18:13:38,628] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2022-11-25 18:13:38,616] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 18:13:38,616] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2022-11-25 18:13:38,619] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:13:38,619] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 18:13:38,619] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:13:38,626] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +2: [2022-11-25 18:13:38,628] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:13:38,629] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +4: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:13:38,626] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2022-11-25 18:13:38,626] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +2: [2022-11-25 18:13:38,629] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2022-11-25 18:13:38,626] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2022-11-25 18:13:38,626] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 18:13:38,626] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2022-11-25 18:13:38,626] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2022-11-25 18:13:38,638] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step8000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 18:13:38,638] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: successfully saved checkpoint at iteration 8000 to checkpoints_146m +7: time (ms) | save-checkpoint: 402.02 +7: iteration 8010/ 21553 | consumed samples: 2050560 | consumed tokens: 4199546880 | elapsed time per iteration (s): 0.35 | learning rate: 1.470E-04 | global batch size: 256 | lm loss: 2.707127E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 732.671 | TFLOPs: 25.65 | +7: iteration 8020/ 21553 | consumed samples: 2053120 | consumed tokens: 4204789760 | elapsed time per iteration (s): 0.30 | learning rate: 1.468E-04 | global batch size: 256 | lm loss: 2.671088E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.561 | TFLOPs: 29.64 | +7: iteration 8030/ 21553 | consumed samples: 2055680 | consumed tokens: 4210032640 | elapsed time per iteration (s): 0.31 | learning rate: 1.467E-04 | global batch size: 256 | lm loss: 2.661433E+00 | grad norm: 0.386 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.516 | TFLOPs: 29.25 | +7: iteration 8040/ 21553 | consumed samples: 2058240 | consumed tokens: 4215275520 | elapsed time per iteration (s): 0.30 | learning rate: 1.466E-04 | global batch size: 256 | lm loss: 2.669422E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.133 | TFLOPs: 29.45 | +7: iteration 8050/ 21553 | consumed samples: 2060800 | consumed tokens: 4220518400 | elapsed time per iteration (s): 0.30 | learning rate: 1.465E-04 | global batch size: 256 | lm loss: 2.659303E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.060 | TFLOPs: 29.65 | +7: iteration 8060/ 21553 | consumed samples: 2063360 | consumed tokens: 4225761280 | elapsed time per iteration (s): 0.32 | learning rate: 1.464E-04 | global batch size: 256 | lm loss: 2.708554E+00 | grad norm: 0.317 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 809.425 | TFLOPs: 28.34 | +7: iteration 8070/ 21553 | consumed samples: 2065920 | consumed tokens: 4231004160 | elapsed time per iteration (s): 0.32 | learning rate: 1.462E-04 | global batch size: 256 | lm loss: 2.663161E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 791.781 | TFLOPs: 27.72 | +7: iteration 8080/ 21553 | consumed samples: 2068480 | consumed tokens: 4236247040 | elapsed time per iteration (s): 0.30 | learning rate: 1.461E-04 | global batch size: 256 | lm loss: 2.662837E+00 | grad norm: 0.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.988 | TFLOPs: 29.62 | +7: iteration 8090/ 21553 | consumed samples: 2071040 | consumed tokens: 4241489920 | elapsed time per iteration (s): 0.30 | learning rate: 1.460E-04 | global batch size: 256 | lm loss: 2.656141E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.216 | TFLOPs: 30.08 | +7: iteration 8100/ 21553 | consumed samples: 2073600 | consumed tokens: 4246732800 | elapsed time per iteration (s): 0.31 | learning rate: 1.459E-04 | global batch size: 256 | lm loss: 2.678896E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.087 | TFLOPs: 29.30 | +7: iteration 8110/ 21553 | consumed samples: 2076160 | consumed tokens: 4251975680 | elapsed time per iteration (s): 0.30 | learning rate: 1.457E-04 | global batch size: 256 | lm loss: 2.660052E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.098 | TFLOPs: 30.14 | +7: iteration 8120/ 21553 | consumed samples: 2078720 | consumed tokens: 4257218560 | elapsed time per iteration (s): 0.31 | learning rate: 1.456E-04 | global batch size: 256 | lm loss: 2.666844E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.731 | TFLOPs: 28.87 | +7: iteration 8130/ 21553 | consumed samples: 2081280 | consumed tokens: 4262461440 | elapsed time per iteration (s): 0.31 | learning rate: 1.455E-04 | global batch size: 256 | lm loss: 2.663249E+00 | grad norm: 0.319 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.499 | TFLOPs: 29.28 | +7: iteration 8140/ 21553 | consumed samples: 2083840 | consumed tokens: 4267704320 | elapsed time per iteration (s): 0.30 | learning rate: 1.454E-04 | global batch size: 256 | lm loss: 2.680085E+00 | grad norm: 0.328 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.221 | TFLOPs: 29.48 | +7: iteration 8150/ 21553 | consumed samples: 2086400 | consumed tokens: 4272947200 | elapsed time per iteration (s): 0.30 | learning rate: 1.453E-04 | global batch size: 256 | lm loss: 2.660643E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.451 | TFLOPs: 29.60 | +7: iteration 8160/ 21553 | consumed samples: 2088960 | consumed tokens: 4278190080 | elapsed time per iteration (s): 0.30 | learning rate: 1.451E-04 | global batch size: 256 | lm loss: 2.685322E+00 | grad norm: 0.327 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.179 | TFLOPs: 29.80 | +7: iteration 8170/ 21553 | consumed samples: 2091520 | consumed tokens: 4283432960 | elapsed time per iteration (s): 0.31 | learning rate: 1.450E-04 | global batch size: 256 | lm loss: 2.666122E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.163 | TFLOPs: 29.20 | +7: iteration 8180/ 21553 | consumed samples: 2094080 | consumed tokens: 4288675840 | elapsed time per iteration (s): 0.30 | learning rate: 1.449E-04 | global batch size: 256 | lm loss: 2.656985E+00 | grad norm: 0.323 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.528 | TFLOPs: 29.53 | +7: iteration 8190/ 21553 | consumed samples: 2096640 | consumed tokens: 4293918720 | elapsed time per iteration (s): 0.30 | learning rate: 1.448E-04 | global batch size: 256 | lm loss: 2.694933E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.018 | TFLOPs: 29.62 | +7: iteration 8200/ 21553 | consumed samples: 2099200 | consumed tokens: 4299161600 | elapsed time per iteration (s): 0.30 | learning rate: 1.447E-04 | global batch size: 256 | lm loss: 2.730825E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.929 | TFLOPs: 29.89 | +7: iteration 8210/ 21553 | consumed samples: 2101760 | consumed tokens: 4304404480 | elapsed time per iteration (s): 0.30 | learning rate: 1.445E-04 | global batch size: 256 | lm loss: 2.685824E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.823 | TFLOPs: 29.47 | +7: iteration 8220/ 21553 | consumed samples: 2104320 | consumed tokens: 4309647360 | elapsed time per iteration (s): 0.30 | learning rate: 1.444E-04 | global batch size: 256 | lm loss: 2.673837E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.889 | TFLOPs: 29.79 | +7: iteration 8230/ 21553 | consumed samples: 2106880 | consumed tokens: 4314890240 | elapsed time per iteration (s): 0.31 | learning rate: 1.443E-04 | global batch size: 256 | lm loss: 2.641303E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.715 | TFLOPs: 29.15 | +7: iteration 8240/ 21553 | consumed samples: 2109440 | consumed tokens: 4320133120 | elapsed time per iteration (s): 0.30 | learning rate: 1.442E-04 | global batch size: 256 | lm loss: 2.670354E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.563 | TFLOPs: 29.88 | +7: iteration 8250/ 21553 | consumed samples: 2112000 | consumed tokens: 4325376000 | elapsed time per iteration (s): 0.30 | learning rate: 1.440E-04 | global batch size: 256 | lm loss: 2.702522E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.469 | TFLOPs: 29.56 | +7: iteration 8260/ 21553 | consumed samples: 2114560 | consumed tokens: 4330618880 | elapsed time per iteration (s): 0.30 | learning rate: 1.439E-04 | global batch size: 256 | lm loss: 2.680205E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.292 | TFLOPs: 29.70 | +7: iteration 8270/ 21553 | consumed samples: 2117120 | consumed tokens: 4335861760 | elapsed time per iteration (s): 0.31 | learning rate: 1.438E-04 | global batch size: 256 | lm loss: 2.672401E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.346 | TFLOPs: 29.28 | +7: iteration 8280/ 21553 | consumed samples: 2119680 | consumed tokens: 4341104640 | elapsed time per iteration (s): 0.31 | learning rate: 1.437E-04 | global batch size: 256 | lm loss: 2.665387E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.021 | TFLOPs: 29.16 | +7: iteration 8290/ 21553 | consumed samples: 2122240 | consumed tokens: 4346347520 | elapsed time per iteration (s): 0.30 | learning rate: 1.435E-04 | global batch size: 256 | lm loss: 2.716727E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.133 | TFLOPs: 29.62 | +7: iteration 8300/ 21553 | consumed samples: 2124800 | consumed tokens: 4351590400 | elapsed time per iteration (s): 0.30 | learning rate: 1.434E-04 | global batch size: 256 | lm loss: 2.688479E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.536 | TFLOPs: 29.42 | +7: iteration 8310/ 21553 | consumed samples: 2127360 | consumed tokens: 4356833280 | elapsed time per iteration (s): 0.30 | learning rate: 1.433E-04 | global batch size: 256 | lm loss: 2.664778E+00 | grad norm: 0.339 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.574 | TFLOPs: 29.71 | +7: iteration 8320/ 21553 | consumed samples: 2129920 | consumed tokens: 4362076160 | elapsed time per iteration (s): 0.31 | learning rate: 1.432E-04 | global batch size: 256 | lm loss: 2.708394E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.925 | TFLOPs: 29.37 | +7: iteration 8330/ 21553 | consumed samples: 2132480 | consumed tokens: 4367319040 | elapsed time per iteration (s): 0.31 | learning rate: 1.431E-04 | global batch size: 256 | lm loss: 2.701032E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.104 | TFLOPs: 29.27 | +7: iteration 8340/ 21553 | consumed samples: 2135040 | consumed tokens: 4372561920 | elapsed time per iteration (s): 0.30 | learning rate: 1.429E-04 | global batch size: 256 | lm loss: 2.703253E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.351 | TFLOPs: 29.66 | +7: iteration 8350/ 21553 | consumed samples: 2137600 | consumed tokens: 4377804800 | elapsed time per iteration (s): 0.30 | learning rate: 1.428E-04 | global batch size: 256 | lm loss: 2.677070E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.949 | TFLOPs: 29.75 | +7: iteration 8360/ 21553 | consumed samples: 2140160 | consumed tokens: 4383047680 | elapsed time per iteration (s): 0.31 | learning rate: 1.427E-04 | global batch size: 256 | lm loss: 2.684690E+00 | grad norm: 0.332 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.609 | TFLOPs: 29.18 | +7: iteration 8370/ 21553 | consumed samples: 2142720 | consumed tokens: 4388290560 | elapsed time per iteration (s): 0.31 | learning rate: 1.426E-04 | global batch size: 256 | lm loss: 2.637162E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.525 | TFLOPs: 29.00 | +7: iteration 8380/ 21553 | consumed samples: 2145280 | consumed tokens: 4393533440 | elapsed time per iteration (s): 0.30 | learning rate: 1.424E-04 | global batch size: 256 | lm loss: 2.670026E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.161 | TFLOPs: 29.45 | +7: iteration 8390/ 21553 | consumed samples: 2147840 | consumed tokens: 4398776320 | elapsed time per iteration (s): 0.31 | learning rate: 1.423E-04 | global batch size: 256 | lm loss: 2.662690E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.260 | TFLOPs: 29.07 | +7: iteration 8400/ 21553 | consumed samples: 2150400 | consumed tokens: 4404019200 | elapsed time per iteration (s): 0.30 | learning rate: 1.422E-04 | global batch size: 256 | lm loss: 2.643905E+00 | grad norm: 0.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.235 | TFLOPs: 29.90 | +7: iteration 8410/ 21553 | consumed samples: 2152960 | consumed tokens: 4409262080 | elapsed time per iteration (s): 0.30 | learning rate: 1.421E-04 | global batch size: 256 | lm loss: 2.624481E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.225 | TFLOPs: 29.87 | +7: iteration 8420/ 21553 | consumed samples: 2155520 | consumed tokens: 4414504960 | elapsed time per iteration (s): 0.31 | learning rate: 1.419E-04 | global batch size: 256 | lm loss: 2.651486E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.870 | TFLOPs: 28.84 | +7: iteration 8430/ 21553 | consumed samples: 2158080 | consumed tokens: 4419747840 | elapsed time per iteration (s): 0.31 | learning rate: 1.418E-04 | global batch size: 256 | lm loss: 2.682108E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.489 | TFLOPs: 29.14 | +7: iteration 8440/ 21553 | consumed samples: 2160640 | consumed tokens: 4424990720 | elapsed time per iteration (s): 0.30 | learning rate: 1.417E-04 | global batch size: 256 | lm loss: 2.660513E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.633 | TFLOPs: 30.13 | +7: iteration 8450/ 21553 | consumed samples: 2163200 | consumed tokens: 4430233600 | elapsed time per iteration (s): 0.30 | learning rate: 1.416E-04 | global batch size: 256 | lm loss: 2.671371E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.809 | TFLOPs: 29.85 | +7: iteration 8460/ 21553 | consumed samples: 2165760 | consumed tokens: 4435476480 | elapsed time per iteration (s): 0.30 | learning rate: 1.414E-04 | global batch size: 256 | lm loss: 2.695058E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.930 | TFLOPs: 29.65 | +7: iteration 8470/ 21553 | consumed samples: 2168320 | consumed tokens: 4440719360 | elapsed time per iteration (s): 0.30 | learning rate: 1.413E-04 | global batch size: 256 | lm loss: 2.654253E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.419 | TFLOPs: 29.53 | +7: iteration 8480/ 21553 | consumed samples: 2170880 | consumed tokens: 4445962240 | elapsed time per iteration (s): 0.31 | learning rate: 1.412E-04 | global batch size: 256 | lm loss: 2.651997E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.484 | TFLOPs: 28.83 | +7: iteration 8490/ 21553 | consumed samples: 2173440 | consumed tokens: 4451205120 | elapsed time per iteration (s): 0.31 | learning rate: 1.411E-04 | global batch size: 256 | lm loss: 2.682381E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.511 | TFLOPs: 28.72 | +7: iteration 8500/ 21553 | consumed samples: 2176000 | consumed tokens: 4456448000 | elapsed time per iteration (s): 0.31 | learning rate: 1.409E-04 | global batch size: 256 | lm loss: 2.685364E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.356 | TFLOPs: 29.24 | +7: iteration 8510/ 21553 | consumed samples: 2178560 | consumed tokens: 4461690880 | elapsed time per iteration (s): 0.30 | learning rate: 1.408E-04 | global batch size: 256 | lm loss: 2.618479E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.060 | TFLOPs: 29.51 | +7: iteration 8520/ 21553 | consumed samples: 2181120 | consumed tokens: 4466933760 | elapsed time per iteration (s): 0.30 | learning rate: 1.407E-04 | global batch size: 256 | lm loss: 2.641392E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.918 | TFLOPs: 29.68 | +7: iteration 8530/ 21553 | consumed samples: 2183680 | consumed tokens: 4472176640 | elapsed time per iteration (s): 0.31 | learning rate: 1.406E-04 | global batch size: 256 | lm loss: 2.646509E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.336 | TFLOPs: 29.38 | +7: iteration 8540/ 21553 | consumed samples: 2186240 | consumed tokens: 4477419520 | elapsed time per iteration (s): 0.30 | learning rate: 1.405E-04 | global batch size: 256 | lm loss: 2.658899E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.846 | TFLOPs: 29.68 | +7: iteration 8550/ 21553 | consumed samples: 2188800 | consumed tokens: 4482662400 | elapsed time per iteration (s): 0.31 | learning rate: 1.403E-04 | global batch size: 256 | lm loss: 2.655449E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.408 | TFLOPs: 29.28 | +7: iteration 8560/ 21553 | consumed samples: 2191360 | consumed tokens: 4487905280 | elapsed time per iteration (s): 0.34 | learning rate: 1.402E-04 | global batch size: 256 | lm loss: 2.664741E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 754.666 | TFLOPs: 26.42 | +7: iteration 8570/ 21553 | consumed samples: 2193920 | consumed tokens: 4493148160 | elapsed time per iteration (s): 0.31 | learning rate: 1.401E-04 | global batch size: 256 | lm loss: 2.661856E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.411 | TFLOPs: 29.28 | +7: iteration 8580/ 21553 | consumed samples: 2196480 | consumed tokens: 4498391040 | elapsed time per iteration (s): 0.30 | learning rate: 1.400E-04 | global batch size: 256 | lm loss: 2.664940E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.365 | TFLOPs: 29.38 | +7: iteration 8590/ 21553 | consumed samples: 2199040 | consumed tokens: 4503633920 | elapsed time per iteration (s): 0.31 | learning rate: 1.398E-04 | global batch size: 256 | lm loss: 2.694544E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.771 | TFLOPs: 29.33 | +7: iteration 8600/ 21553 | consumed samples: 2201600 | consumed tokens: 4508876800 | elapsed time per iteration (s): 0.30 | learning rate: 1.397E-04 | global batch size: 256 | lm loss: 2.681502E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.901 | TFLOPs: 29.58 | +7: iteration 8610/ 21553 | consumed samples: 2204160 | consumed tokens: 4514119680 | elapsed time per iteration (s): 0.30 | learning rate: 1.396E-04 | global batch size: 256 | lm loss: 2.653589E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.184 | TFLOPs: 29.69 | +7: iteration 8620/ 21553 | consumed samples: 2206720 | consumed tokens: 4519362560 | elapsed time per iteration (s): 0.30 | learning rate: 1.395E-04 | global batch size: 256 | lm loss: 2.693717E+00 | grad norm: 0.337 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.855 | TFLOPs: 29.86 | +7: iteration 8630/ 21553 | consumed samples: 2209280 | consumed tokens: 4524605440 | elapsed time per iteration (s): 0.30 | learning rate: 1.393E-04 | global batch size: 256 | lm loss: 2.645361E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.665 | TFLOPs: 29.81 | +7: iteration 8640/ 21553 | consumed samples: 2211840 | consumed tokens: 4529848320 | elapsed time per iteration (s): 0.31 | learning rate: 1.392E-04 | global batch size: 256 | lm loss: 2.640386E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.299 | TFLOPs: 29.00 | +7: iteration 8650/ 21553 | consumed samples: 2214400 | consumed tokens: 4535091200 | elapsed time per iteration (s): 0.31 | learning rate: 1.391E-04 | global batch size: 256 | lm loss: 2.630871E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.494 | TFLOPs: 29.25 | +7: iteration 8660/ 21553 | consumed samples: 2216960 | consumed tokens: 4540334080 | elapsed time per iteration (s): 0.30 | learning rate: 1.389E-04 | global batch size: 256 | lm loss: 2.665293E+00 | grad norm: 0.342 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.913 | TFLOPs: 29.86 | +7: iteration 8670/ 21553 | consumed samples: 2219520 | consumed tokens: 4545576960 | elapsed time per iteration (s): 0.30 | learning rate: 1.388E-04 | global batch size: 256 | lm loss: 2.656559E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.343 | TFLOPs: 29.66 | +7: iteration 8680/ 21553 | consumed samples: 2222080 | consumed tokens: 4550819840 | elapsed time per iteration (s): 0.30 | learning rate: 1.387E-04 | global batch size: 256 | lm loss: 2.654974E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.413 | TFLOPs: 29.63 | +7: iteration 8690/ 21553 | consumed samples: 2224640 | consumed tokens: 4556062720 | elapsed time per iteration (s): 0.30 | learning rate: 1.386E-04 | global batch size: 256 | lm loss: 2.641323E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.913 | TFLOPs: 29.40 | +7: iteration 8700/ 21553 | consumed samples: 2227200 | consumed tokens: 4561305600 | elapsed time per iteration (s): 0.30 | learning rate: 1.384E-04 | global batch size: 256 | lm loss: 2.650997E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.390 | TFLOPs: 29.52 | +7: iteration 8710/ 21553 | consumed samples: 2229760 | consumed tokens: 4566548480 | elapsed time per iteration (s): 0.31 | learning rate: 1.383E-04 | global batch size: 256 | lm loss: 2.635637E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.349 | TFLOPs: 29.31 | +7: iteration 8720/ 21553 | consumed samples: 2232320 | consumed tokens: 4571791360 | elapsed time per iteration (s): 0.30 | learning rate: 1.382E-04 | global batch size: 256 | lm loss: 2.634480E+00 | grad norm: 0.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.320 | TFLOPs: 29.45 | +7: iteration 8730/ 21553 | consumed samples: 2234880 | consumed tokens: 4577034240 | elapsed time per iteration (s): 0.31 | learning rate: 1.381E-04 | global batch size: 256 | lm loss: 2.681150E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.634 | TFLOPs: 28.69 | +7: iteration 8740/ 21553 | consumed samples: 2237440 | consumed tokens: 4582277120 | elapsed time per iteration (s): 0.31 | learning rate: 1.379E-04 | global batch size: 256 | lm loss: 2.622884E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.392 | TFLOPs: 28.89 | +7: iteration 8750/ 21553 | consumed samples: 2240000 | consumed tokens: 4587520000 | elapsed time per iteration (s): 0.30 | learning rate: 1.378E-04 | global batch size: 256 | lm loss: 2.668789E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.332 | TFLOPs: 29.42 | +7: iteration 8760/ 21553 | consumed samples: 2242560 | consumed tokens: 4592762880 | elapsed time per iteration (s): 0.32 | learning rate: 1.377E-04 | global batch size: 256 | lm loss: 2.659682E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 811.748 | TFLOPs: 28.42 | +7: iteration 8770/ 21553 | consumed samples: 2245120 | consumed tokens: 4598005760 | elapsed time per iteration (s): 0.30 | learning rate: 1.376E-04 | global batch size: 256 | lm loss: 2.627574E+00 | grad norm: 0.323 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.432 | TFLOPs: 29.81 | +7: iteration 8780/ 21553 | consumed samples: 2247680 | consumed tokens: 4603248640 | elapsed time per iteration (s): 0.31 | learning rate: 1.374E-04 | global batch size: 256 | lm loss: 2.646394E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.451 | TFLOPs: 28.90 | +7: iteration 8790/ 21553 | consumed samples: 2250240 | consumed tokens: 4608491520 | elapsed time per iteration (s): 0.30 | learning rate: 1.373E-04 | global batch size: 256 | lm loss: 2.636317E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.245 | TFLOPs: 29.59 | +7: iteration 8800/ 21553 | consumed samples: 2252800 | consumed tokens: 4613734400 | elapsed time per iteration (s): 0.30 | learning rate: 1.372E-04 | global batch size: 256 | lm loss: 2.660730E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.298 | TFLOPs: 29.63 | +7: iteration 8810/ 21553 | consumed samples: 2255360 | consumed tokens: 4618977280 | elapsed time per iteration (s): 0.30 | learning rate: 1.371E-04 | global batch size: 256 | lm loss: 2.638914E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.053 | TFLOPs: 29.55 | +7: iteration 8820/ 21553 | consumed samples: 2257920 | consumed tokens: 4624220160 | elapsed time per iteration (s): 0.35 | learning rate: 1.369E-04 | global batch size: 256 | lm loss: 2.659123E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 728.147 | TFLOPs: 25.49 | +7: iteration 8830/ 21553 | consumed samples: 2260480 | consumed tokens: 4629463040 | elapsed time per iteration (s): 0.30 | learning rate: 1.368E-04 | global batch size: 256 | lm loss: 2.658493E+00 | grad norm: 0.344 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.898 | TFLOPs: 30.00 | +7: iteration 8840/ 21553 | consumed samples: 2263040 | consumed tokens: 4634705920 | elapsed time per iteration (s): 0.30 | learning rate: 1.367E-04 | global batch size: 256 | lm loss: 2.666479E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.011 | TFLOPs: 30.14 | +7: iteration 8850/ 21553 | consumed samples: 2265600 | consumed tokens: 4639948800 | elapsed time per iteration (s): 0.30 | learning rate: 1.366E-04 | global batch size: 256 | lm loss: 2.646543E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.513 | TFLOPs: 29.98 | +7: iteration 8860/ 21553 | consumed samples: 2268160 | consumed tokens: 4645191680 | elapsed time per iteration (s): 0.30 | learning rate: 1.364E-04 | global batch size: 256 | lm loss: 2.658465E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.896 | TFLOPs: 30.00 | +7: iteration 8870/ 21553 | consumed samples: 2270720 | consumed tokens: 4650434560 | elapsed time per iteration (s): 0.30 | learning rate: 1.363E-04 | global batch size: 256 | lm loss: 2.633993E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.985 | TFLOPs: 29.55 | +7: iteration 8880/ 21553 | consumed samples: 2273280 | consumed tokens: 4655677440 | elapsed time per iteration (s): 0.30 | learning rate: 1.362E-04 | global batch size: 256 | lm loss: 2.651927E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.460 | TFLOPs: 29.63 | +7: iteration 8890/ 21553 | consumed samples: 2275840 | consumed tokens: 4660920320 | elapsed time per iteration (s): 0.30 | learning rate: 1.360E-04 | global batch size: 256 | lm loss: 2.624794E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.482 | TFLOPs: 29.67 | +7: iteration 8900/ 21553 | consumed samples: 2278400 | consumed tokens: 4666163200 | elapsed time per iteration (s): 0.30 | learning rate: 1.359E-04 | global batch size: 256 | lm loss: 2.658854E+00 | grad norm: 0.307 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.631 | TFLOPs: 29.92 | +7: iteration 8910/ 21553 | consumed samples: 2280960 | consumed tokens: 4671406080 | elapsed time per iteration (s): 0.30 | learning rate: 1.358E-04 | global batch size: 256 | lm loss: 2.651340E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.258 | TFLOPs: 29.70 | +7: iteration 8920/ 21553 | consumed samples: 2283520 | consumed tokens: 4676648960 | elapsed time per iteration (s): 0.31 | learning rate: 1.357E-04 | global batch size: 256 | lm loss: 2.627003E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.001 | TFLOPs: 28.78 | +7: iteration 8930/ 21553 | consumed samples: 2286080 | consumed tokens: 4681891840 | elapsed time per iteration (s): 0.30 | learning rate: 1.355E-04 | global batch size: 256 | lm loss: 2.637142E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.035 | TFLOPs: 29.55 | +7: iteration 8940/ 21553 | consumed samples: 2288640 | consumed tokens: 4687134720 | elapsed time per iteration (s): 0.31 | learning rate: 1.354E-04 | global batch size: 256 | lm loss: 2.638240E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.057 | TFLOPs: 29.06 | +7: iteration 8950/ 21553 | consumed samples: 2291200 | consumed tokens: 4692377600 | elapsed time per iteration (s): 0.32 | learning rate: 1.353E-04 | global batch size: 256 | lm loss: 2.648367E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 798.655 | TFLOPs: 27.96 | +7: iteration 8960/ 21553 | consumed samples: 2293760 | consumed tokens: 4697620480 | elapsed time per iteration (s): 0.30 | learning rate: 1.352E-04 | global batch size: 256 | lm loss: 2.640506E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.590 | TFLOPs: 29.39 | +7: iteration 8970/ 21553 | consumed samples: 2296320 | consumed tokens: 4702863360 | elapsed time per iteration (s): 0.32 | learning rate: 1.350E-04 | global batch size: 256 | lm loss: 2.647449E+00 | grad norm: 0.317 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 801.932 | TFLOPs: 28.07 | +7: iteration 8980/ 21553 | consumed samples: 2298880 | consumed tokens: 4708106240 | elapsed time per iteration (s): 0.31 | learning rate: 1.349E-04 | global batch size: 256 | lm loss: 2.626959E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.879 | TFLOPs: 29.09 | +7: iteration 8990/ 21553 | consumed samples: 2301440 | consumed tokens: 4713349120 | elapsed time per iteration (s): 0.30 | learning rate: 1.348E-04 | global batch size: 256 | lm loss: 2.631505E+00 | grad norm: 0.364 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.668 | TFLOPs: 29.43 | +7: iteration 9000/ 21553 | consumed samples: 2304000 | consumed tokens: 4718592000 | elapsed time per iteration (s): 0.31 | learning rate: 1.346E-04 | global batch size: 256 | lm loss: 2.639656E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.669 | TFLOPs: 28.90 | +7: ------------------------------------------------------------------------------------------ +7: valid loss at iteration 9000 | lm loss value: 2.500567E+00 | lm loss PPL: 1.218940E+01 | +7: ------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 9000 to checkpoints_146m +0: [2022-11-25 18:18:44,274] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step9000 is begin to save! +0: [2022-11-25 18:18:44,278] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_01-model_00-model_states.pt... +0: [2022-11-25 18:18:44,363] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_01-model_00-model_states.pt. +0: [2022-11-25 18:18:44,363] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_03-model_00-model_states.pt... +0: [2022-11-25 18:18:44,379] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_03-model_00-model_states.pt. +0: [2022-11-25 18:18:44,379] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_04-model_00-model_states.pt... +0: [2022-11-25 18:18:44,394] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_04-model_00-model_states.pt. +0: [2022-11-25 18:18:44,394] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_05-model_00-model_states.pt... +0: [2022-11-25 18:18:44,409] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_05-model_00-model_states.pt. +0: [2022-11-25 18:18:44,409] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_06-model_00-model_states.pt... +0: [2022-11-25 18:18:44,423] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_06-model_00-model_states.pt. +0: [2022-11-25 18:18:44,423] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_07-model_00-model_states.pt... +0: [2022-11-25 18:18:44,438] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_07-model_00-model_states.pt. +0: [2022-11-25 18:18:44,438] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_08-model_00-model_states.pt... +0: [2022-11-25 18:18:44,452] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_08-model_00-model_states.pt. +0: [2022-11-25 18:18:44,453] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_09-model_00-model_states.pt... +0: [2022-11-25 18:18:44,467] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_09-model_00-model_states.pt. +0: [2022-11-25 18:18:44,467] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_10-model_00-model_states.pt... +0: [2022-11-25 18:18:44,482] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_10-model_00-model_states.pt. +0: [2022-11-25 18:18:44,482] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_11-model_00-model_states.pt... +0: [2022-11-25 18:18:44,496] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_11-model_00-model_states.pt. +0: [2022-11-25 18:18:44,497] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_12-model_00-model_states.pt... +0: [2022-11-25 18:18:44,511] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_12-model_00-model_states.pt. +0: [2022-11-25 18:18:44,511] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_13-model_00-model_states.pt... +0: [2022-11-25 18:18:44,526] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_13-model_00-model_states.pt. +0: [2022-11-25 18:18:44,526] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_14-model_00-model_states.pt... +0: [2022-11-25 18:18:44,540] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_14-model_00-model_states.pt. +0: [2022-11-25 18:18:44,541] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_15-model_00-model_states.pt... +0: [2022-11-25 18:18:44,555] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_15-model_00-model_states.pt. +0: [2022-11-25 18:18:44,556] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_16-model_00-model_states.pt... +0: [2022-11-25 18:18:44,570] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_16-model_00-model_states.pt. +0: [2022-11-25 18:18:44,571] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_17-model_00-model_states.pt... +0: [2022-11-25 18:18:44,585] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_17-model_00-model_states.pt. +0: [2022-11-25 18:18:44,585] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/layer_19-model_00-model_states.pt... +0: [2022-11-25 18:18:44,586] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/layer_19-model_00-model_states.pt. +0: [2022-11-25 18:18:44,587] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step9000/mp_rank_00_model_states.pt +0: [2022-11-25 18:18:44,587] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/mp_rank_00_model_states.pt... +0: [2022-11-25 18:18:44,589] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/mp_rank_00_model_states.pt. +0: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:18:44,811] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step9000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:18:44,842] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:18:44,844] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:18:44,844] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2022-11-25 18:18:44,844] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2022-11-25 18:18:44,844] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:18:44,844] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:18:44,844] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2022-11-25 18:18:44,844] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2022-11-25 18:18:44,844] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 18:18:44,844] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2022-11-25 18:18:44,844] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:18:44,844] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 18:18:44,844] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2022-11-25 18:18:44,845] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:18:44,845] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 18:18:44,845] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2022-11-25 18:18:44,845] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:18:44,845] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:18:44,846] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 18:18:44,846] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +7: [2022-11-25 18:18:44,846] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:18:44,846] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 18:18:44,846] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2022-11-25 18:18:44,846] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:18:44,846] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2022-11-25 18:18:44,846] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:18:44,846] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2022-11-25 18:18:44,846] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2022-11-25 18:18:44,846] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:18:44,846] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2022-11-25 18:18:44,846] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 18:18:44,846] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2022-11-25 18:18:44,846] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:18:44,847] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 18:18:44,847] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2022-11-25 18:18:44,847] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:18:44,847] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2022-11-25 18:18:44,847] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2022-11-25 18:18:44,848] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:18:44,848] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 18:18:44,848] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2022-11-25 18:18:44,848] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:18:44,848] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 18:18:44,848] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2022-11-25 18:18:44,848] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:18:44,849] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:18:44,849] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +6: [2022-11-25 18:18:44,849] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +0: [2022-11-25 18:18:44,849] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +5: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:18:44,849] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +6: [2022-11-25 18:18:44,849] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +4: [2022-11-25 18:18:44,849] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +3: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:18:44,849] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2022-11-25 18:18:44,849] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:18:44,849] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:18:44,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:18:44,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +1: [2022-11-25 18:18:44,846] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 18:18:44,846] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:18:44,849] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2022-11-25 18:18:44,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:18:44,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:18:44,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:18:44,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:18:44,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +7: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2022-11-25 18:18:44,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +5: [2022-11-25 18:18:44,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +7: [2022-11-25 18:18:44,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +6: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2022-11-25 18:18:44,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2022-11-25 18:18:44,851] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:18:44,851] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 18:18:44,851] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2022-11-25 18:18:44,851] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:18:44,851] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2022-11-25 18:18:44,851] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2022-11-25 18:18:44,851] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:18:44,851] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 18:18:44,851] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2022-11-25 18:18:44,852] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:18:44,852] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:18:44,852] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 18:18:44,852] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 18:18:44,852] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2022-11-25 18:18:44,852] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2022-11-25 18:18:44,853] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:18:44,853] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 18:18:44,853] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2022-11-25 18:18:44,853] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:18:44,853] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 18:18:44,853] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2022-11-25 18:18:44,853] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:18:44,853] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:18:44,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 18:18:44,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +0: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:18:44,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 18:18:44,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 18:18:44,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1: [2022-11-25 18:18:44,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2022-11-25 18:18:44,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:18:44,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:18:44,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 18:18:44,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +7: [2022-11-25 18:18:44,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +4: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2022-11-25 18:18:44,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2022-11-25 18:18:44,856] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:18:44,856] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:18:44,856] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:18:44,856] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 18:18:44,856] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 18:18:44,856] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 18:18:44,856] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2022-11-25 18:18:44,856] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2022-11-25 18:18:44,856] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2022-11-25 18:18:44,857] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:18:44,857] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:18:44,857] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:18:44,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2022-11-25 18:18:44,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 18:18:44,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2022-11-25 18:18:44,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2022-11-25 18:18:44,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2022-11-25 18:18:44,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2022-11-25 18:18:44,858] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:18:44,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 18:18:44,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2022-11-25 18:18:44,858] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:18:44,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 18:18:44,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2022-11-25 18:18:44,859] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:18:44,859] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 18:18:44,860] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2022-11-25 18:18:44,861] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:18:44,861] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 18:18:44,861] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2022-11-25 18:18:44,864] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:18:44,864] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step9000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 18:18:44,864] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: successfully saved checkpoint at iteration 9000 to checkpoints_146m +7: time (ms) | save-checkpoint: 603.24 +7: iteration 9010/ 21553 | consumed samples: 2306560 | consumed tokens: 4723834880 | elapsed time per iteration (s): 0.37 | learning rate: 1.345E-04 | global batch size: 256 | lm loss: 2.659378E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 691.101 | TFLOPs: 24.19 | +7: iteration 9020/ 21553 | consumed samples: 2309120 | consumed tokens: 4729077760 | elapsed time per iteration (s): 0.31 | learning rate: 1.344E-04 | global batch size: 256 | lm loss: 2.633344E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.072 | TFLOPs: 29.09 | +7: iteration 9030/ 21553 | consumed samples: 2311680 | consumed tokens: 4734320640 | elapsed time per iteration (s): 0.31 | learning rate: 1.343E-04 | global batch size: 256 | lm loss: 2.667186E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.442 | TFLOPs: 29.18 | +7: iteration 9040/ 21553 | consumed samples: 2314240 | consumed tokens: 4739563520 | elapsed time per iteration (s): 0.32 | learning rate: 1.341E-04 | global batch size: 256 | lm loss: 2.655653E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 797.917 | TFLOPs: 27.93 | +7: iteration 9050/ 21553 | consumed samples: 2316800 | consumed tokens: 4744806400 | elapsed time per iteration (s): 0.31 | learning rate: 1.340E-04 | global batch size: 256 | lm loss: 2.673836E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.940 | TFLOPs: 28.81 | +7: iteration 9060/ 21553 | consumed samples: 2319360 | consumed tokens: 4750049280 | elapsed time per iteration (s): 0.30 | learning rate: 1.339E-04 | global batch size: 256 | lm loss: 2.665565E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.116 | TFLOPs: 29.69 | +7: iteration 9070/ 21553 | consumed samples: 2321920 | consumed tokens: 4755292160 | elapsed time per iteration (s): 0.31 | learning rate: 1.338E-04 | global batch size: 256 | lm loss: 2.649175E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.217 | TFLOPs: 28.92 | +7: iteration 9080/ 21553 | consumed samples: 2324480 | consumed tokens: 4760535040 | elapsed time per iteration (s): 0.30 | learning rate: 1.336E-04 | global batch size: 256 | lm loss: 2.678802E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.249 | TFLOPs: 30.01 | +7: iteration 9090/ 21553 | consumed samples: 2327040 | consumed tokens: 4765777920 | elapsed time per iteration (s): 0.30 | learning rate: 1.335E-04 | global batch size: 256 | lm loss: 2.632897E+00 | grad norm: 0.341 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.767 | TFLOPs: 29.40 | +7: iteration 9100/ 21553 | consumed samples: 2329600 | consumed tokens: 4771020800 | elapsed time per iteration (s): 0.31 | learning rate: 1.334E-04 | global batch size: 256 | lm loss: 2.665184E+00 | grad norm: 0.344 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.701 | TFLOPs: 29.29 | +7: iteration 9110/ 21553 | consumed samples: 2332160 | consumed tokens: 4776263680 | elapsed time per iteration (s): 0.30 | learning rate: 1.332E-04 | global batch size: 256 | lm loss: 2.627391E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.103 | TFLOPs: 29.65 | +7: iteration 9120/ 21553 | consumed samples: 2334720 | consumed tokens: 4781506560 | elapsed time per iteration (s): 0.31 | learning rate: 1.331E-04 | global batch size: 256 | lm loss: 2.658834E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.081 | TFLOPs: 29.37 | +7: iteration 9130/ 21553 | consumed samples: 2337280 | consumed tokens: 4786749440 | elapsed time per iteration (s): 0.30 | learning rate: 1.330E-04 | global batch size: 256 | lm loss: 2.651074E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.448 | TFLOPs: 29.60 | +7: iteration 9140/ 21553 | consumed samples: 2339840 | consumed tokens: 4791992320 | elapsed time per iteration (s): 0.30 | learning rate: 1.329E-04 | global batch size: 256 | lm loss: 2.635694E+00 | grad norm: 0.317 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.397 | TFLOPs: 29.52 | +7: iteration 9150/ 21553 | consumed samples: 2342400 | consumed tokens: 4797235200 | elapsed time per iteration (s): 0.30 | learning rate: 1.327E-04 | global batch size: 256 | lm loss: 2.654020E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.385 | TFLOPs: 29.77 | +7: iteration 9160/ 21553 | consumed samples: 2344960 | consumed tokens: 4802478080 | elapsed time per iteration (s): 0.30 | learning rate: 1.326E-04 | global batch size: 256 | lm loss: 2.660613E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.046 | TFLOPs: 29.76 | +7: iteration 9170/ 21553 | consumed samples: 2347520 | consumed tokens: 4807720960 | elapsed time per iteration (s): 0.30 | learning rate: 1.325E-04 | global batch size: 256 | lm loss: 2.701307E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.687 | TFLOPs: 29.78 | +7: iteration 9180/ 21553 | consumed samples: 2350080 | consumed tokens: 4812963840 | elapsed time per iteration (s): 0.30 | learning rate: 1.323E-04 | global batch size: 256 | lm loss: 2.653548E+00 | grad norm: 0.307 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.118 | TFLOPs: 29.52 | +7: iteration 9190/ 21553 | consumed samples: 2352640 | consumed tokens: 4818206720 | elapsed time per iteration (s): 0.31 | learning rate: 1.322E-04 | global batch size: 256 | lm loss: 2.657800E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.231 | TFLOPs: 29.06 | +7: iteration 9200/ 21553 | consumed samples: 2355200 | consumed tokens: 4823449600 | elapsed time per iteration (s): 0.30 | learning rate: 1.321E-04 | global batch size: 256 | lm loss: 2.639651E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.642 | TFLOPs: 29.39 | +7: iteration 9210/ 21553 | consumed samples: 2357760 | consumed tokens: 4828692480 | elapsed time per iteration (s): 0.30 | learning rate: 1.320E-04 | global batch size: 256 | lm loss: 2.636707E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.210 | TFLOPs: 29.59 | +7: iteration 9220/ 21553 | consumed samples: 2360320 | consumed tokens: 4833935360 | elapsed time per iteration (s): 0.31 | learning rate: 1.318E-04 | global batch size: 256 | lm loss: 2.594206E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.003 | TFLOPs: 29.20 | +7: iteration 9230/ 21553 | consumed samples: 2362880 | consumed tokens: 4839178240 | elapsed time per iteration (s): 0.30 | learning rate: 1.317E-04 | global batch size: 256 | lm loss: 2.659342E+00 | grad norm: 0.307 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.305 | TFLOPs: 29.63 | +7: iteration 9240/ 21553 | consumed samples: 2365440 | consumed tokens: 4844421120 | elapsed time per iteration (s): 0.30 | learning rate: 1.316E-04 | global batch size: 256 | lm loss: 2.641341E+00 | grad norm: 0.312 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.967 | TFLOPs: 29.47 | +7: iteration 9250/ 21553 | consumed samples: 2368000 | consumed tokens: 4849664000 | elapsed time per iteration (s): 0.30 | learning rate: 1.314E-04 | global batch size: 256 | lm loss: 2.639765E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.890 | TFLOPs: 30.07 | +7: iteration 9260/ 21553 | consumed samples: 2370560 | consumed tokens: 4854906880 | elapsed time per iteration (s): 0.31 | learning rate: 1.313E-04 | global batch size: 256 | lm loss: 2.673612E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.293 | TFLOPs: 28.86 | +7: iteration 9270/ 21553 | consumed samples: 2373120 | consumed tokens: 4860149760 | elapsed time per iteration (s): 0.30 | learning rate: 1.312E-04 | global batch size: 256 | lm loss: 2.659805E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.934 | TFLOPs: 29.54 | +7: iteration 9280/ 21553 | consumed samples: 2375680 | consumed tokens: 4865392640 | elapsed time per iteration (s): 0.30 | learning rate: 1.311E-04 | global batch size: 256 | lm loss: 2.660976E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.879 | TFLOPs: 29.72 | +7: iteration 9290/ 21553 | consumed samples: 2378240 | consumed tokens: 4870635520 | elapsed time per iteration (s): 0.30 | learning rate: 1.309E-04 | global batch size: 256 | lm loss: 2.644291E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.978 | TFLOPs: 29.69 | +7: iteration 9300/ 21553 | consumed samples: 2380800 | consumed tokens: 4875878400 | elapsed time per iteration (s): 0.30 | learning rate: 1.308E-04 | global batch size: 256 | lm loss: 2.624231E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.671 | TFLOPs: 29.67 | +7: iteration 9310/ 21553 | consumed samples: 2383360 | consumed tokens: 4881121280 | elapsed time per iteration (s): 0.31 | learning rate: 1.307E-04 | global batch size: 256 | lm loss: 2.647070E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.414 | TFLOPs: 29.11 | +7: iteration 9320/ 21553 | consumed samples: 2385920 | consumed tokens: 4886364160 | elapsed time per iteration (s): 0.31 | learning rate: 1.305E-04 | global batch size: 256 | lm loss: 2.643495E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.868 | TFLOPs: 28.91 | +7: iteration 9330/ 21553 | consumed samples: 2388480 | consumed tokens: 4891607040 | elapsed time per iteration (s): 0.31 | learning rate: 1.304E-04 | global batch size: 256 | lm loss: 2.672696E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.203 | TFLOPs: 29.13 | +7: iteration 9340/ 21553 | consumed samples: 2391040 | consumed tokens: 4896849920 | elapsed time per iteration (s): 0.31 | learning rate: 1.303E-04 | global batch size: 256 | lm loss: 2.653495E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.168 | TFLOPs: 29.34 | +7: iteration 9350/ 21553 | consumed samples: 2393600 | consumed tokens: 4902092800 | elapsed time per iteration (s): 0.30 | learning rate: 1.302E-04 | global batch size: 256 | lm loss: 2.655507E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.212 | TFLOPs: 29.73 | +7: iteration 9360/ 21553 | consumed samples: 2396160 | consumed tokens: 4907335680 | elapsed time per iteration (s): 0.31 | learning rate: 1.300E-04 | global batch size: 256 | lm loss: 2.608997E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.315 | TFLOPs: 29.35 | +7: iteration 9370/ 21553 | consumed samples: 2398720 | consumed tokens: 4912578560 | elapsed time per iteration (s): 0.30 | learning rate: 1.299E-04 | global batch size: 256 | lm loss: 2.638690E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.342 | TFLOPs: 29.42 | +7: iteration 9380/ 21553 | consumed samples: 2401280 | consumed tokens: 4917821440 | elapsed time per iteration (s): 0.30 | learning rate: 1.298E-04 | global batch size: 256 | lm loss: 2.653322E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.085 | TFLOPs: 29.65 | +7: iteration 9390/ 21553 | consumed samples: 2403840 | consumed tokens: 4923064320 | elapsed time per iteration (s): 0.30 | learning rate: 1.296E-04 | global batch size: 256 | lm loss: 2.622622E+00 | grad norm: 0.338 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.639 | TFLOPs: 29.53 | +7: iteration 9400/ 21553 | consumed samples: 2406400 | consumed tokens: 4928307200 | elapsed time per iteration (s): 0.30 | learning rate: 1.295E-04 | global batch size: 256 | lm loss: 2.653407E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.956 | TFLOPs: 29.89 | +7: iteration 9410/ 21553 | consumed samples: 2408960 | consumed tokens: 4933550080 | elapsed time per iteration (s): 0.31 | learning rate: 1.294E-04 | global batch size: 256 | lm loss: 2.598364E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.815 | TFLOPs: 28.80 | +7: iteration 9420/ 21553 | consumed samples: 2411520 | consumed tokens: 4938792960 | elapsed time per iteration (s): 0.31 | learning rate: 1.293E-04 | global batch size: 256 | lm loss: 2.661740E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.789 | TFLOPs: 29.12 | +7: iteration 9430/ 21553 | consumed samples: 2414080 | consumed tokens: 4944035840 | elapsed time per iteration (s): 0.31 | learning rate: 1.291E-04 | global batch size: 256 | lm loss: 2.627238E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.028 | TFLOPs: 29.16 | +7: iteration 9440/ 21553 | consumed samples: 2416640 | consumed tokens: 4949278720 | elapsed time per iteration (s): 0.30 | learning rate: 1.290E-04 | global batch size: 256 | lm loss: 2.651920E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.884 | TFLOPs: 29.44 | +7: iteration 9450/ 21553 | consumed samples: 2419200 | consumed tokens: 4954521600 | elapsed time per iteration (s): 0.30 | learning rate: 1.289E-04 | global batch size: 256 | lm loss: 2.689575E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.970 | TFLOPs: 30.04 | +7: iteration 9460/ 21553 | consumed samples: 2421760 | consumed tokens: 4959764480 | elapsed time per iteration (s): 0.32 | learning rate: 1.287E-04 | global batch size: 256 | lm loss: 2.661116E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 809.155 | TFLOPs: 28.33 | +7: iteration 9470/ 21553 | consumed samples: 2424320 | consumed tokens: 4965007360 | elapsed time per iteration (s): 0.30 | learning rate: 1.286E-04 | global batch size: 256 | lm loss: 2.663418E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.826 | TFLOPs: 29.79 | +7: iteration 9480/ 21553 | consumed samples: 2426880 | consumed tokens: 4970250240 | elapsed time per iteration (s): 0.32 | learning rate: 1.285E-04 | global batch size: 256 | lm loss: 2.609962E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 809.354 | TFLOPs: 28.33 | +7: iteration 9490/ 21553 | consumed samples: 2429440 | consumed tokens: 4975493120 | elapsed time per iteration (s): 0.30 | learning rate: 1.283E-04 | global batch size: 256 | lm loss: 2.612126E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.483 | TFLOPs: 29.74 | +7: iteration 9500/ 21553 | consumed samples: 2432000 | consumed tokens: 4980736000 | elapsed time per iteration (s): 0.30 | learning rate: 1.282E-04 | global batch size: 256 | lm loss: 2.646208E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.535 | TFLOPs: 29.56 | +7: iteration 9510/ 21553 | consumed samples: 2434560 | consumed tokens: 4985978880 | elapsed time per iteration (s): 0.31 | learning rate: 1.281E-04 | global batch size: 256 | lm loss: 2.643769E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.063 | TFLOPs: 29.37 | +7: iteration 9520/ 21553 | consumed samples: 2437120 | consumed tokens: 4991221760 | elapsed time per iteration (s): 0.30 | learning rate: 1.280E-04 | global batch size: 256 | lm loss: 2.622796E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.184 | TFLOPs: 29.41 | +7: iteration 9530/ 21553 | consumed samples: 2439680 | consumed tokens: 4996464640 | elapsed time per iteration (s): 0.31 | learning rate: 1.278E-04 | global batch size: 256 | lm loss: 2.663624E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.943 | TFLOPs: 28.98 | +7: iteration 9540/ 21553 | consumed samples: 2442240 | consumed tokens: 5001707520 | elapsed time per iteration (s): 0.30 | learning rate: 1.277E-04 | global batch size: 256 | lm loss: 2.662347E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.317 | TFLOPs: 29.52 | +7: iteration 9550/ 21553 | consumed samples: 2444800 | consumed tokens: 5006950400 | elapsed time per iteration (s): 0.32 | learning rate: 1.276E-04 | global batch size: 256 | lm loss: 2.654179E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 804.927 | TFLOPs: 28.18 | +7: iteration 9560/ 21553 | consumed samples: 2447360 | consumed tokens: 5012193280 | elapsed time per iteration (s): 0.30 | learning rate: 1.274E-04 | global batch size: 256 | lm loss: 2.649746E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.841 | TFLOPs: 29.58 | +7: iteration 9570/ 21553 | consumed samples: 2449920 | consumed tokens: 5017436160 | elapsed time per iteration (s): 0.31 | learning rate: 1.273E-04 | global batch size: 256 | lm loss: 2.579852E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.531 | TFLOPs: 29.11 | +7: iteration 9580/ 21553 | consumed samples: 2452480 | consumed tokens: 5022679040 | elapsed time per iteration (s): 0.30 | learning rate: 1.272E-04 | global batch size: 256 | lm loss: 2.644219E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.405 | TFLOPs: 29.67 | +7: iteration 9590/ 21553 | consumed samples: 2455040 | consumed tokens: 5027921920 | elapsed time per iteration (s): 0.30 | learning rate: 1.270E-04 | global batch size: 256 | lm loss: 2.612819E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.600 | TFLOPs: 29.64 | +7: iteration 9600/ 21553 | consumed samples: 2457600 | consumed tokens: 5033164800 | elapsed time per iteration (s): 0.31 | learning rate: 1.269E-04 | global batch size: 256 | lm loss: 2.620036E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.809 | TFLOPs: 29.15 | +7: iteration 9610/ 21553 | consumed samples: 2460160 | consumed tokens: 5038407680 | elapsed time per iteration (s): 0.30 | learning rate: 1.268E-04 | global batch size: 256 | lm loss: 2.617701E+00 | grad norm: 0.314 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.785 | TFLOPs: 29.92 | +7: iteration 9620/ 21553 | consumed samples: 2462720 | consumed tokens: 5043650560 | elapsed time per iteration (s): 0.30 | learning rate: 1.267E-04 | global batch size: 256 | lm loss: 2.683018E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.742 | TFLOPs: 29.71 | +7: iteration 9630/ 21553 | consumed samples: 2465280 | consumed tokens: 5048893440 | elapsed time per iteration (s): 0.30 | learning rate: 1.265E-04 | global batch size: 256 | lm loss: 2.652936E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.013 | TFLOPs: 29.76 | +7: iteration 9640/ 21553 | consumed samples: 2467840 | consumed tokens: 5054136320 | elapsed time per iteration (s): 0.31 | learning rate: 1.264E-04 | global batch size: 256 | lm loss: 2.607958E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.758 | TFLOPs: 29.33 | +7: iteration 9650/ 21553 | consumed samples: 2470400 | consumed tokens: 5059379200 | elapsed time per iteration (s): 0.30 | learning rate: 1.263E-04 | global batch size: 256 | lm loss: 2.647495E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.375 | TFLOPs: 29.91 | +7: iteration 9660/ 21553 | consumed samples: 2472960 | consumed tokens: 5064622080 | elapsed time per iteration (s): 0.30 | learning rate: 1.261E-04 | global batch size: 256 | lm loss: 2.649447E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.754 | TFLOPs: 29.57 | +7: iteration 9670/ 21553 | consumed samples: 2475520 | consumed tokens: 5069864960 | elapsed time per iteration (s): 0.31 | learning rate: 1.260E-04 | global batch size: 256 | lm loss: 2.618578E+00 | grad norm: 0.330 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.638 | TFLOPs: 29.32 | +7: iteration 9680/ 21553 | consumed samples: 2478080 | consumed tokens: 5075107840 | elapsed time per iteration (s): 0.31 | learning rate: 1.259E-04 | global batch size: 256 | lm loss: 2.622193E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.079 | TFLOPs: 29.34 | +7: iteration 9690/ 21553 | consumed samples: 2480640 | consumed tokens: 5080350720 | elapsed time per iteration (s): 0.30 | learning rate: 1.257E-04 | global batch size: 256 | lm loss: 2.622176E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.634 | TFLOPs: 30.13 | +7: iteration 9700/ 21553 | consumed samples: 2483200 | consumed tokens: 5085593600 | elapsed time per iteration (s): 0.30 | learning rate: 1.256E-04 | global batch size: 256 | lm loss: 2.633418E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.795 | TFLOPs: 30.13 | +7: iteration 9710/ 21553 | consumed samples: 2485760 | consumed tokens: 5090836480 | elapsed time per iteration (s): 0.30 | learning rate: 1.255E-04 | global batch size: 256 | lm loss: 2.626439E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.542 | TFLOPs: 29.64 | +7: iteration 9720/ 21553 | consumed samples: 2488320 | consumed tokens: 5096079360 | elapsed time per iteration (s): 0.30 | learning rate: 1.254E-04 | global batch size: 256 | lm loss: 2.630178E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.921 | TFLOPs: 29.54 | +7: iteration 9730/ 21553 | consumed samples: 2490880 | consumed tokens: 5101322240 | elapsed time per iteration (s): 0.30 | learning rate: 1.252E-04 | global batch size: 256 | lm loss: 2.681543E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.814 | TFLOPs: 29.78 | +7: iteration 9740/ 21553 | consumed samples: 2493440 | consumed tokens: 5106565120 | elapsed time per iteration (s): 0.30 | learning rate: 1.251E-04 | global batch size: 256 | lm loss: 2.648462E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.663 | TFLOPs: 29.57 | +7: iteration 9750/ 21553 | consumed samples: 2496000 | consumed tokens: 5111808000 | elapsed time per iteration (s): 0.30 | learning rate: 1.250E-04 | global batch size: 256 | lm loss: 2.624701E+00 | grad norm: 0.321 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.110 | TFLOPs: 30.11 | +7: iteration 9760/ 21553 | consumed samples: 2498560 | consumed tokens: 5117050880 | elapsed time per iteration (s): 0.30 | learning rate: 1.248E-04 | global batch size: 256 | lm loss: 2.619928E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.684 | TFLOPs: 29.99 | +7: iteration 9770/ 21553 | consumed samples: 2501120 | consumed tokens: 5122293760 | elapsed time per iteration (s): 0.30 | learning rate: 1.247E-04 | global batch size: 256 | lm loss: 2.635024E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.092 | TFLOPs: 29.69 | +7: iteration 9780/ 21553 | consumed samples: 2503680 | consumed tokens: 5127536640 | elapsed time per iteration (s): 0.30 | learning rate: 1.246E-04 | global batch size: 256 | lm loss: 2.661715E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.119 | TFLOPs: 29.55 | +7: iteration 9790/ 21553 | consumed samples: 2506240 | consumed tokens: 5132779520 | elapsed time per iteration (s): 0.30 | learning rate: 1.244E-04 | global batch size: 256 | lm loss: 2.618040E+00 | grad norm: 0.337 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.518 | TFLOPs: 29.84 | +7: iteration 9800/ 21553 | consumed samples: 2508800 | consumed tokens: 5138022400 | elapsed time per iteration (s): 0.30 | learning rate: 1.243E-04 | global batch size: 256 | lm loss: 2.646500E+00 | grad norm: 0.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.025 | TFLOPs: 29.83 | +7: iteration 9810/ 21553 | consumed samples: 2511360 | consumed tokens: 5143265280 | elapsed time per iteration (s): 0.30 | learning rate: 1.242E-04 | global batch size: 256 | lm loss: 2.641888E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.699 | TFLOPs: 29.99 | +7: iteration 9820/ 21553 | consumed samples: 2513920 | consumed tokens: 5148508160 | elapsed time per iteration (s): 0.30 | learning rate: 1.240E-04 | global batch size: 256 | lm loss: 2.613607E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.714 | TFLOPs: 29.82 | +7: iteration 9830/ 21553 | consumed samples: 2516480 | consumed tokens: 5153751040 | elapsed time per iteration (s): 0.30 | learning rate: 1.239E-04 | global batch size: 256 | lm loss: 2.642268E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.459 | TFLOPs: 29.49 | +7: iteration 9840/ 21553 | consumed samples: 2519040 | consumed tokens: 5158993920 | elapsed time per iteration (s): 0.30 | learning rate: 1.238E-04 | global batch size: 256 | lm loss: 2.595703E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.921 | TFLOPs: 29.58 | +7: iteration 9850/ 21553 | consumed samples: 2521600 | consumed tokens: 5164236800 | elapsed time per iteration (s): 0.30 | learning rate: 1.237E-04 | global batch size: 256 | lm loss: 2.640386E+00 | grad norm: 0.315 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.900 | TFLOPs: 29.75 | +7: iteration 9860/ 21553 | consumed samples: 2524160 | consumed tokens: 5169479680 | elapsed time per iteration (s): 0.31 | learning rate: 1.235E-04 | global batch size: 256 | lm loss: 2.657126E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.379 | TFLOPs: 29.31 | +7: iteration 9870/ 21553 | consumed samples: 2526720 | consumed tokens: 5174722560 | elapsed time per iteration (s): 0.30 | learning rate: 1.234E-04 | global batch size: 256 | lm loss: 2.597054E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.041 | TFLOPs: 30.07 | +7: iteration 9880/ 21553 | consumed samples: 2529280 | consumed tokens: 5179965440 | elapsed time per iteration (s): 0.31 | learning rate: 1.233E-04 | global batch size: 256 | lm loss: 2.628379E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.901 | TFLOPs: 29.33 | +7: iteration 9890/ 21553 | consumed samples: 2531840 | consumed tokens: 5185208320 | elapsed time per iteration (s): 0.31 | learning rate: 1.231E-04 | global batch size: 256 | lm loss: 2.662988E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.067 | TFLOPs: 29.34 | +7: iteration 9900/ 21553 | consumed samples: 2534400 | consumed tokens: 5190451200 | elapsed time per iteration (s): 0.31 | learning rate: 1.230E-04 | global batch size: 256 | lm loss: 2.660839E+00 | grad norm: 0.312 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.964 | TFLOPs: 28.98 | +7: iteration 9910/ 21553 | consumed samples: 2536960 | consumed tokens: 5195694080 | elapsed time per iteration (s): 0.30 | learning rate: 1.229E-04 | global batch size: 256 | lm loss: 2.642667E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.321 | TFLOPs: 29.49 | +7: iteration 9920/ 21553 | consumed samples: 2539520 | consumed tokens: 5200936960 | elapsed time per iteration (s): 0.31 | learning rate: 1.227E-04 | global batch size: 256 | lm loss: 2.668446E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.382 | TFLOPs: 29.28 | +7: iteration 9930/ 21553 | consumed samples: 2542080 | consumed tokens: 5206179840 | elapsed time per iteration (s): 0.31 | learning rate: 1.226E-04 | global batch size: 256 | lm loss: 2.639725E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.084 | TFLOPs: 28.92 | +7: iteration 9940/ 21553 | consumed samples: 2544640 | consumed tokens: 5211422720 | elapsed time per iteration (s): 0.30 | learning rate: 1.225E-04 | global batch size: 256 | lm loss: 2.613216E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.084 | TFLOPs: 29.41 | +7: iteration 9950/ 21553 | consumed samples: 2547200 | consumed tokens: 5216665600 | elapsed time per iteration (s): 0.31 | learning rate: 1.223E-04 | global batch size: 256 | lm loss: 2.609112E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.679 | TFLOPs: 29.15 | +7: iteration 9960/ 21553 | consumed samples: 2549760 | consumed tokens: 5221908480 | elapsed time per iteration (s): 0.31 | learning rate: 1.222E-04 | global batch size: 256 | lm loss: 2.643748E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.828 | TFLOPs: 29.19 | +7: iteration 9970/ 21553 | consumed samples: 2552320 | consumed tokens: 5227151360 | elapsed time per iteration (s): 0.30 | learning rate: 1.221E-04 | global batch size: 256 | lm loss: 2.600793E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.635 | TFLOPs: 29.53 | +7: iteration 9980/ 21553 | consumed samples: 2554880 | consumed tokens: 5232394240 | elapsed time per iteration (s): 0.30 | learning rate: 1.219E-04 | global batch size: 256 | lm loss: 2.652720E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.871 | TFLOPs: 29.44 | +7: iteration 9990/ 21553 | consumed samples: 2557440 | consumed tokens: 5237637120 | elapsed time per iteration (s): 0.30 | learning rate: 1.218E-04 | global batch size: 256 | lm loss: 2.651909E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.988 | TFLOPs: 29.51 | +0: [2022-11-25 18:23:49,182] [INFO] [logging.py:68:log_dist] [Rank 0] step=10000, skipped=0, lr=[0.00012168458711439383, 0.00012168458711439383, 0.00012168458711439383], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 10000/ 21553 | consumed samples: 2560000 | consumed tokens: 5242880000 | elapsed time per iteration (s): 0.31 | learning rate: 1.217E-04 | global batch size: 256 | lm loss: 2.654670E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.797 | TFLOPs: 29.19 | +0: steps: 10000 loss: 2.6245 iter time (s): 0.303 samples/sec: 846.047 +7: ------------------------------------------------------------------------------------------- +7: valid loss at iteration 10000 | lm loss value: 2.531667E+00 | lm loss PPL: 1.257446E+01 | +7: ------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 10000 to checkpoints_146m +0: [2022-11-25 18:23:49,312] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step10000 is begin to save! +0: [2022-11-25 18:23:49,315] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_01-model_00-model_states.pt... +0: [2022-11-25 18:23:49,438] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_01-model_00-model_states.pt. +0: [2022-11-25 18:23:49,439] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_03-model_00-model_states.pt... +0: [2022-11-25 18:23:49,460] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_03-model_00-model_states.pt. +0: [2022-11-25 18:23:49,460] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_04-model_00-model_states.pt... +0: [2022-11-25 18:23:49,481] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_04-model_00-model_states.pt. +0: [2022-11-25 18:23:49,482] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_05-model_00-model_states.pt... +0: [2022-11-25 18:23:49,503] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_05-model_00-model_states.pt. +0: [2022-11-25 18:23:49,503] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_06-model_00-model_states.pt... +0: [2022-11-25 18:23:49,524] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_06-model_00-model_states.pt. +0: [2022-11-25 18:23:49,525] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_07-model_00-model_states.pt... +0: [2022-11-25 18:23:49,545] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_07-model_00-model_states.pt. +0: [2022-11-25 18:23:49,546] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_08-model_00-model_states.pt... +0: [2022-11-25 18:23:49,567] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_08-model_00-model_states.pt. +0: [2022-11-25 18:23:49,567] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_09-model_00-model_states.pt... +0: [2022-11-25 18:23:49,588] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_09-model_00-model_states.pt. +0: [2022-11-25 18:23:49,588] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_10-model_00-model_states.pt... +0: [2022-11-25 18:23:49,609] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_10-model_00-model_states.pt. +0: [2022-11-25 18:23:49,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_11-model_00-model_states.pt... +0: [2022-11-25 18:23:49,630] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_11-model_00-model_states.pt. +0: [2022-11-25 18:23:49,630] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_12-model_00-model_states.pt... +0: [2022-11-25 18:23:49,651] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_12-model_00-model_states.pt. +0: [2022-11-25 18:23:49,651] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_13-model_00-model_states.pt... +0: [2022-11-25 18:23:49,672] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_13-model_00-model_states.pt. +0: [2022-11-25 18:23:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_14-model_00-model_states.pt... +0: [2022-11-25 18:23:49,693] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_14-model_00-model_states.pt. +0: [2022-11-25 18:23:49,693] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_15-model_00-model_states.pt... +0: [2022-11-25 18:23:49,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_15-model_00-model_states.pt. +0: [2022-11-25 18:23:49,714] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_16-model_00-model_states.pt... +0: [2022-11-25 18:23:49,735] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_16-model_00-model_states.pt. +0: [2022-11-25 18:23:49,736] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_17-model_00-model_states.pt... +0: [2022-11-25 18:23:49,756] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_17-model_00-model_states.pt. +0: [2022-11-25 18:23:49,757] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/layer_19-model_00-model_states.pt... +0: [2022-11-25 18:23:49,758] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/layer_19-model_00-model_states.pt. +0: [2022-11-25 18:23:49,759] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step10000/mp_rank_00_model_states.pt +0: [2022-11-25 18:23:49,759] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/mp_rank_00_model_states.pt... +0: [2022-11-25 18:23:49,762] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/mp_rank_00_model_states.pt. +0: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:23:49,779] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step10000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:23:49,813] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:23:49,813] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 18:23:49,814] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +7: [2022-11-25 18:23:49,814] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:23:49,814] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 18:23:49,814] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2022-11-25 18:23:49,815] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:23:49,815] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 18:23:49,815] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2022-11-25 18:23:49,815] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:23:49,815] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 18:23:49,815] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:23:49,816] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +7: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +7: [2022-11-25 18:23:49,816] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:23:49,816] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:23:49,816] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:23:49,816] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2022-11-25 18:23:49,814] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:23:49,814] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:23:49,815] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:23:49,817] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:23:49,817] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 18:23:49,817] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2022-11-25 18:23:49,817] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:23:49,817] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:23:49,817] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +3: [2022-11-25 18:23:49,817] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +5: [2022-11-25 18:23:49,817] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2022-11-25 18:23:49,817] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2022-11-25 18:23:49,818] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:23:49,818] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 18:23:49,818] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2022-11-25 18:23:49,818] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:23:49,818] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 18:23:49,818] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:23:49,819] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +5: [2022-11-25 18:23:49,819] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +7: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2022-11-25 18:23:49,819] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:23:49,819] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2022-11-25 18:23:49,820] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:23:49,820] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 18:23:49,820] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +7: [2022-11-25 18:23:49,820] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:23:49,820] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 18:23:49,820] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:23:49,817] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 18:23:49,817] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2022-11-25 18:23:49,817] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:23:49,817] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 18:23:49,817] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2022-11-25 18:23:49,817] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:23:49,818] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 18:23:49,818] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2022-11-25 18:23:49,820] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:23:49,820] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +5: [2022-11-25 18:23:49,821] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:23:49,821] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 18:23:49,821] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2022-11-25 18:23:49,824] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:23:49,824] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 18:23:49,824] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +7: [2022-11-25 18:23:49,824] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:23:49,824] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:23:49,824] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 18:23:49,824] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 18:23:49,824] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +7: [2022-11-25 18:23:49,824] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2022-11-25 18:23:49,820] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2022-11-25 18:23:49,821] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:23:49,821] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 18:23:49,821] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2022-11-25 18:23:49,824] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:23:49,824] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 18:23:49,824] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2022-11-25 18:23:49,825] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:23:49,825] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 18:23:49,825] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2022-11-25 18:23:49,815] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +2: [2022-11-25 18:23:49,814] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +6: [2022-11-25 18:23:49,814] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +4: [2022-11-25 18:23:49,815] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:23:49,815] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:23:49,815] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2022-11-25 18:23:49,814] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2022-11-25 18:23:49,814] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2022-11-25 18:23:49,815] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 18:23:49,815] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +2: [2022-11-25 18:23:49,815] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:23:49,815] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2022-11-25 18:23:49,815] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2022-11-25 18:23:49,815] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 18:23:49,815] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:23:49,818] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:23:49,819] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +2: [2022-11-25 18:23:49,816] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +6: [2022-11-25 18:23:49,818] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +4: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2022-11-25 18:23:49,816] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2022-11-25 18:23:49,818] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:23:49,817] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:23:49,819] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +2: [2022-11-25 18:23:49,817] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +6: [2022-11-25 18:23:49,819] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +4: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2022-11-25 18:23:49,817] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2022-11-25 18:23:49,819] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2022-11-25 18:23:49,822] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:23:49,822] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:23:49,822] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:23:49,821] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:23:49,821] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:23:49,823] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +2: [2022-11-25 18:23:49,822] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +6: [2022-11-25 18:23:49,821] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +4: [2022-11-25 18:23:49,823] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 18:23:49,823] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +2: [2022-11-25 18:23:49,822] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2022-11-25 18:23:49,821] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2022-11-25 18:23:49,823] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2022-11-25 18:23:49,822] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:23:49,822] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:23:49,823] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2022-11-25 18:23:49,822] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +6: [2022-11-25 18:23:49,822] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +4: [2022-11-25 18:23:49,823] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2022-11-25 18:23:49,822] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2022-11-25 18:23:49,822] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2022-11-25 18:23:49,824] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:23:49,824] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:23:49,825] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:23:49,824] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 18:23:49,824] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +6: [2022-11-25 18:23:49,825] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:23:49,825] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +2: [2022-11-25 18:23:49,824] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2022-11-25 18:23:49,824] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2022-11-25 18:23:49,825] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:23:49,825] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2022-11-25 18:23:49,825] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2022-11-25 18:23:49,825] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 18:23:49,825] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2022-11-25 18:23:49,825] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2022-11-25 18:23:49,828] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:23:49,828] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:23:49,828] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:23:49,828] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 18:23:49,828] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:23:49,828] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 18:23:49,828] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 18:23:49,828] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2022-11-25 18:23:49,828] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 18:23:49,828] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2022-11-25 18:23:49,828] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2022-11-25 18:23:49,828] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2022-11-25 18:23:49,828] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:23:49,828] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 18:23:49,828] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2022-11-25 18:23:49,829] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:23:49,829] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 18:23:49,829] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2022-11-25 18:23:49,837] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:23:49,838] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:23:49,838] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 18:23:49,838] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2022-11-25 18:23:49,838] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:23:49,838] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 18:23:49,838] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2022-11-25 18:23:49,842] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step10000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 18:23:49,842] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: successfully saved checkpoint at iteration 10000 to checkpoints_146m +7: time (ms) | save-checkpoint: 535.84 +7: iteration 10010/ 21553 | consumed samples: 2562560 | consumed tokens: 5248122880 | elapsed time per iteration (s): 0.37 | learning rate: 1.216E-04 | global batch size: 256 | lm loss: 2.625154E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 700.155 | TFLOPs: 24.51 | +7: iteration 10020/ 21553 | consumed samples: 2565120 | consumed tokens: 5253365760 | elapsed time per iteration (s): 0.30 | learning rate: 1.214E-04 | global batch size: 256 | lm loss: 2.651059E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.239 | TFLOPs: 29.97 | +7: iteration 10030/ 21553 | consumed samples: 2567680 | consumed tokens: 5258608640 | elapsed time per iteration (s): 0.31 | learning rate: 1.213E-04 | global batch size: 256 | lm loss: 2.652482E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.849 | TFLOPs: 28.84 | +7: iteration 10040/ 21553 | consumed samples: 2570240 | consumed tokens: 5263851520 | elapsed time per iteration (s): 0.31 | learning rate: 1.212E-04 | global batch size: 256 | lm loss: 2.652297E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 814.208 | TFLOPs: 28.50 | +7: iteration 10050/ 21553 | consumed samples: 2572800 | consumed tokens: 5269094400 | elapsed time per iteration (s): 0.30 | learning rate: 1.210E-04 | global batch size: 256 | lm loss: 2.628030E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.568 | TFLOPs: 30.06 | +7: iteration 10060/ 21553 | consumed samples: 2575360 | consumed tokens: 5274337280 | elapsed time per iteration (s): 0.31 | learning rate: 1.209E-04 | global batch size: 256 | lm loss: 2.634057E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.986 | TFLOPs: 28.78 | +7: iteration 10070/ 21553 | consumed samples: 2577920 | consumed tokens: 5279580160 | elapsed time per iteration (s): 0.31 | learning rate: 1.208E-04 | global batch size: 256 | lm loss: 2.636545E+00 | grad norm: 0.266 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.860 | TFLOPs: 28.88 | +7: iteration 10080/ 21553 | consumed samples: 2580480 | consumed tokens: 5284823040 | elapsed time per iteration (s): 0.30 | learning rate: 1.206E-04 | global batch size: 256 | lm loss: 2.596674E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.306 | TFLOPs: 29.84 | +7: iteration 10090/ 21553 | consumed samples: 2583040 | consumed tokens: 5290065920 | elapsed time per iteration (s): 0.30 | learning rate: 1.205E-04 | global batch size: 256 | lm loss: 2.610798E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.990 | TFLOPs: 29.55 | +7: iteration 10100/ 21553 | consumed samples: 2585600 | consumed tokens: 5295308800 | elapsed time per iteration (s): 0.31 | learning rate: 1.204E-04 | global batch size: 256 | lm loss: 2.641601E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.232 | TFLOPs: 29.10 | +7: iteration 10110/ 21553 | consumed samples: 2588160 | consumed tokens: 5300551680 | elapsed time per iteration (s): 0.30 | learning rate: 1.202E-04 | global batch size: 256 | lm loss: 2.615639E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.573 | TFLOPs: 29.50 | +7: iteration 10120/ 21553 | consumed samples: 2590720 | consumed tokens: 5305794560 | elapsed time per iteration (s): 0.30 | learning rate: 1.201E-04 | global batch size: 256 | lm loss: 2.601298E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.641 | TFLOPs: 29.95 | +7: iteration 10130/ 21553 | consumed samples: 2593280 | consumed tokens: 5311037440 | elapsed time per iteration (s): 0.30 | learning rate: 1.200E-04 | global batch size: 256 | lm loss: 2.627557E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.116 | TFLOPs: 29.87 | +7: iteration 10140/ 21553 | consumed samples: 2595840 | consumed tokens: 5316280320 | elapsed time per iteration (s): 0.30 | learning rate: 1.198E-04 | global batch size: 256 | lm loss: 2.637991E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.373 | TFLOPs: 29.56 | +7: iteration 10150/ 21553 | consumed samples: 2598400 | consumed tokens: 5321523200 | elapsed time per iteration (s): 0.30 | learning rate: 1.197E-04 | global batch size: 256 | lm loss: 2.607922E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.693 | TFLOPs: 29.99 | +7: iteration 10160/ 21553 | consumed samples: 2600960 | consumed tokens: 5326766080 | elapsed time per iteration (s): 0.30 | learning rate: 1.196E-04 | global batch size: 256 | lm loss: 2.626657E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.520 | TFLOPs: 29.49 | +7: iteration 10170/ 21553 | consumed samples: 2603520 | consumed tokens: 5332008960 | elapsed time per iteration (s): 0.31 | learning rate: 1.194E-04 | global batch size: 256 | lm loss: 2.650312E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.047 | TFLOPs: 29.23 | +7: iteration 10180/ 21553 | consumed samples: 2606080 | consumed tokens: 5337251840 | elapsed time per iteration (s): 0.31 | learning rate: 1.193E-04 | global batch size: 256 | lm loss: 2.622596E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.172 | TFLOPs: 29.13 | +7: iteration 10190/ 21553 | consumed samples: 2608640 | consumed tokens: 5342494720 | elapsed time per iteration (s): 0.30 | learning rate: 1.192E-04 | global batch size: 256 | lm loss: 2.635567E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.591 | TFLOPs: 29.67 | +7: iteration 10200/ 21553 | consumed samples: 2611200 | consumed tokens: 5347737600 | elapsed time per iteration (s): 0.31 | learning rate: 1.191E-04 | global batch size: 256 | lm loss: 2.621061E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.114 | TFLOPs: 29.20 | +7: iteration 10210/ 21553 | consumed samples: 2613760 | consumed tokens: 5352980480 | elapsed time per iteration (s): 0.30 | learning rate: 1.189E-04 | global batch size: 256 | lm loss: 2.624627E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.603 | TFLOPs: 29.60 | +7: iteration 10220/ 21553 | consumed samples: 2616320 | consumed tokens: 5358223360 | elapsed time per iteration (s): 0.31 | learning rate: 1.188E-04 | global batch size: 256 | lm loss: 2.605620E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.475 | TFLOPs: 29.25 | +7: iteration 10230/ 21553 | consumed samples: 2618880 | consumed tokens: 5363466240 | elapsed time per iteration (s): 0.30 | learning rate: 1.187E-04 | global batch size: 256 | lm loss: 2.623222E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.470 | TFLOPs: 29.56 | +7: iteration 10240/ 21553 | consumed samples: 2621440 | consumed tokens: 5368709120 | elapsed time per iteration (s): 0.30 | learning rate: 1.185E-04 | global batch size: 256 | lm loss: 2.639627E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.736 | TFLOPs: 29.47 | +7: iteration 10250/ 21553 | consumed samples: 2624000 | consumed tokens: 5373952000 | elapsed time per iteration (s): 0.31 | learning rate: 1.184E-04 | global batch size: 256 | lm loss: 2.626737E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.562 | TFLOPs: 28.87 | +7: iteration 10260/ 21553 | consumed samples: 2626560 | consumed tokens: 5379194880 | elapsed time per iteration (s): 0.31 | learning rate: 1.183E-04 | global batch size: 256 | lm loss: 2.598085E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.746 | TFLOPs: 29.33 | +7: iteration 10270/ 21553 | consumed samples: 2629120 | consumed tokens: 5384437760 | elapsed time per iteration (s): 0.30 | learning rate: 1.181E-04 | global batch size: 256 | lm loss: 2.626188E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.580 | TFLOPs: 29.67 | +7: iteration 10280/ 21553 | consumed samples: 2631680 | consumed tokens: 5389680640 | elapsed time per iteration (s): 0.30 | learning rate: 1.180E-04 | global batch size: 256 | lm loss: 2.635999E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.233 | TFLOPs: 29.48 | +7: iteration 10290/ 21553 | consumed samples: 2634240 | consumed tokens: 5394923520 | elapsed time per iteration (s): 0.30 | learning rate: 1.179E-04 | global batch size: 256 | lm loss: 2.648325E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.772 | TFLOPs: 29.71 | +7: iteration 10300/ 21553 | consumed samples: 2636800 | consumed tokens: 5400166400 | elapsed time per iteration (s): 0.31 | learning rate: 1.177E-04 | global batch size: 256 | lm loss: 2.600035E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.310 | TFLOPs: 29.35 | +7: iteration 10310/ 21553 | consumed samples: 2639360 | consumed tokens: 5405409280 | elapsed time per iteration (s): 0.30 | learning rate: 1.176E-04 | global batch size: 256 | lm loss: 2.634328E+00 | grad norm: 0.307 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.697 | TFLOPs: 29.50 | +7: iteration 10320/ 21553 | consumed samples: 2641920 | consumed tokens: 5410652160 | elapsed time per iteration (s): 0.30 | learning rate: 1.175E-04 | global batch size: 256 | lm loss: 2.655027E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.916 | TFLOPs: 29.96 | +7: iteration 10330/ 21553 | consumed samples: 2644480 | consumed tokens: 5415895040 | elapsed time per iteration (s): 0.30 | learning rate: 1.173E-04 | global batch size: 256 | lm loss: 2.611024E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.094 | TFLOPs: 29.58 | +7: iteration 10340/ 21553 | consumed samples: 2647040 | consumed tokens: 5421137920 | elapsed time per iteration (s): 0.31 | learning rate: 1.172E-04 | global batch size: 256 | lm loss: 2.653582E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.331 | TFLOPs: 28.72 | +7: iteration 10350/ 21553 | consumed samples: 2649600 | consumed tokens: 5426380800 | elapsed time per iteration (s): 0.30 | learning rate: 1.171E-04 | global batch size: 256 | lm loss: 2.624432E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.057 | TFLOPs: 29.97 | +7: iteration 10360/ 21553 | consumed samples: 2652160 | consumed tokens: 5431623680 | elapsed time per iteration (s): 0.31 | learning rate: 1.169E-04 | global batch size: 256 | lm loss: 2.631681E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.738 | TFLOPs: 28.80 | +7: iteration 10370/ 21553 | consumed samples: 2654720 | consumed tokens: 5436866560 | elapsed time per iteration (s): 0.30 | learning rate: 1.168E-04 | global batch size: 256 | lm loss: 2.624523E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.140 | TFLOPs: 30.11 | +7: iteration 10380/ 21553 | consumed samples: 2657280 | consumed tokens: 5442109440 | elapsed time per iteration (s): 0.30 | learning rate: 1.167E-04 | global batch size: 256 | lm loss: 2.619565E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.327 | TFLOPs: 29.45 | +7: iteration 10390/ 21553 | consumed samples: 2659840 | consumed tokens: 5447352320 | elapsed time per iteration (s): 0.30 | learning rate: 1.165E-04 | global batch size: 256 | lm loss: 2.609818E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.511 | TFLOPs: 29.46 | +7: iteration 10400/ 21553 | consumed samples: 2662400 | consumed tokens: 5452595200 | elapsed time per iteration (s): 0.30 | learning rate: 1.164E-04 | global batch size: 256 | lm loss: 2.624136E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.028 | TFLOPs: 29.55 | +7: iteration 10410/ 21553 | consumed samples: 2664960 | consumed tokens: 5457838080 | elapsed time per iteration (s): 0.31 | learning rate: 1.163E-04 | global batch size: 256 | lm loss: 2.611964E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.247 | TFLOPs: 28.82 | +7: iteration 10420/ 21553 | consumed samples: 2667520 | consumed tokens: 5463080960 | elapsed time per iteration (s): 0.30 | learning rate: 1.161E-04 | global batch size: 256 | lm loss: 2.637428E+00 | grad norm: 0.320 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.543 | TFLOPs: 29.53 | +7: iteration 10430/ 21553 | consumed samples: 2670080 | consumed tokens: 5468323840 | elapsed time per iteration (s): 0.30 | learning rate: 1.160E-04 | global batch size: 256 | lm loss: 2.632319E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.486 | TFLOPs: 29.95 | +7: iteration 10440/ 21553 | consumed samples: 2672640 | consumed tokens: 5473566720 | elapsed time per iteration (s): 0.32 | learning rate: 1.159E-04 | global batch size: 256 | lm loss: 2.621014E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 808.231 | TFLOPs: 28.29 | +7: iteration 10450/ 21553 | consumed samples: 2675200 | consumed tokens: 5478809600 | elapsed time per iteration (s): 0.31 | learning rate: 1.158E-04 | global batch size: 256 | lm loss: 2.610257E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.245 | TFLOPs: 29.31 | +7: iteration 10460/ 21553 | consumed samples: 2677760 | consumed tokens: 5484052480 | elapsed time per iteration (s): 0.30 | learning rate: 1.156E-04 | global batch size: 256 | lm loss: 2.617804E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.319 | TFLOPs: 29.52 | +7: iteration 10470/ 21553 | consumed samples: 2680320 | consumed tokens: 5489295360 | elapsed time per iteration (s): 0.30 | learning rate: 1.155E-04 | global batch size: 256 | lm loss: 2.622336E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.481 | TFLOPs: 29.56 | +7: iteration 10480/ 21553 | consumed samples: 2682880 | consumed tokens: 5494538240 | elapsed time per iteration (s): 0.31 | learning rate: 1.154E-04 | global batch size: 256 | lm loss: 2.615744E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.986 | TFLOPs: 29.30 | +7: iteration 10490/ 21553 | consumed samples: 2685440 | consumed tokens: 5499781120 | elapsed time per iteration (s): 0.31 | learning rate: 1.152E-04 | global batch size: 256 | lm loss: 2.611610E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.577 | TFLOPs: 28.83 | +7: iteration 10500/ 21553 | consumed samples: 2688000 | consumed tokens: 5505024000 | elapsed time per iteration (s): 0.30 | learning rate: 1.151E-04 | global batch size: 256 | lm loss: 2.615194E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.488 | TFLOPs: 29.81 | +7: iteration 10510/ 21553 | consumed samples: 2690560 | consumed tokens: 5510266880 | elapsed time per iteration (s): 0.30 | learning rate: 1.150E-04 | global batch size: 256 | lm loss: 2.633099E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.982 | TFLOPs: 30.11 | +7: iteration 10520/ 21553 | consumed samples: 2693120 | consumed tokens: 5515509760 | elapsed time per iteration (s): 0.31 | learning rate: 1.148E-04 | global batch size: 256 | lm loss: 2.666467E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 818.649 | TFLOPs: 28.66 | +7: iteration 10530/ 21553 | consumed samples: 2695680 | consumed tokens: 5520752640 | elapsed time per iteration (s): 0.31 | learning rate: 1.147E-04 | global batch size: 256 | lm loss: 2.618620E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.409 | TFLOPs: 29.21 | +7: iteration 10540/ 21553 | consumed samples: 2698240 | consumed tokens: 5525995520 | elapsed time per iteration (s): 0.32 | learning rate: 1.146E-04 | global batch size: 256 | lm loss: 2.612224E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.070 | TFLOPs: 28.25 | +7: iteration 10550/ 21553 | consumed samples: 2700800 | consumed tokens: 5531238400 | elapsed time per iteration (s): 0.30 | learning rate: 1.144E-04 | global batch size: 256 | lm loss: 2.600134E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.064 | TFLOPs: 30.00 | +7: iteration 10560/ 21553 | consumed samples: 2703360 | consumed tokens: 5536481280 | elapsed time per iteration (s): 0.30 | learning rate: 1.143E-04 | global batch size: 256 | lm loss: 2.608535E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.210 | TFLOPs: 29.59 | +7: iteration 10570/ 21553 | consumed samples: 2705920 | consumed tokens: 5541724160 | elapsed time per iteration (s): 0.31 | learning rate: 1.142E-04 | global batch size: 256 | lm loss: 2.606795E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.739 | TFLOPs: 29.33 | +7: iteration 10580/ 21553 | consumed samples: 2708480 | consumed tokens: 5546967040 | elapsed time per iteration (s): 0.30 | learning rate: 1.140E-04 | global batch size: 256 | lm loss: 2.613671E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.419 | TFLOPs: 29.67 | +7: iteration 10590/ 21553 | consumed samples: 2711040 | consumed tokens: 5552209920 | elapsed time per iteration (s): 0.30 | learning rate: 1.139E-04 | global batch size: 256 | lm loss: 2.647448E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.637 | TFLOPs: 29.43 | +7: iteration 10600/ 21553 | consumed samples: 2713600 | consumed tokens: 5557452800 | elapsed time per iteration (s): 0.31 | learning rate: 1.138E-04 | global batch size: 256 | lm loss: 2.607087E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.161 | TFLOPs: 29.31 | +7: iteration 10610/ 21553 | consumed samples: 2716160 | consumed tokens: 5562695680 | elapsed time per iteration (s): 0.30 | learning rate: 1.136E-04 | global batch size: 256 | lm loss: 2.588953E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.088 | TFLOPs: 29.62 | +7: iteration 10620/ 21553 | consumed samples: 2718720 | consumed tokens: 5567938560 | elapsed time per iteration (s): 0.30 | learning rate: 1.135E-04 | global batch size: 256 | lm loss: 2.607251E+00 | grad norm: 0.345 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.714 | TFLOPs: 29.89 | +7: iteration 10630/ 21553 | consumed samples: 2721280 | consumed tokens: 5573181440 | elapsed time per iteration (s): 0.31 | learning rate: 1.134E-04 | global batch size: 256 | lm loss: 2.589243E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.198 | TFLOPs: 29.34 | +7: iteration 10640/ 21553 | consumed samples: 2723840 | consumed tokens: 5578424320 | elapsed time per iteration (s): 0.31 | learning rate: 1.132E-04 | global batch size: 256 | lm loss: 2.620601E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.766 | TFLOPs: 28.94 | +7: iteration 10650/ 21553 | consumed samples: 2726400 | consumed tokens: 5583667200 | elapsed time per iteration (s): 0.30 | learning rate: 1.131E-04 | global batch size: 256 | lm loss: 2.627037E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.446 | TFLOPs: 29.46 | +7: iteration 10660/ 21553 | consumed samples: 2728960 | consumed tokens: 5588910080 | elapsed time per iteration (s): 0.30 | learning rate: 1.130E-04 | global batch size: 256 | lm loss: 2.621474E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.064 | TFLOPs: 29.51 | +7: iteration 10670/ 21553 | consumed samples: 2731520 | consumed tokens: 5594152960 | elapsed time per iteration (s): 0.30 | learning rate: 1.128E-04 | global batch size: 256 | lm loss: 2.639116E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.928 | TFLOPs: 29.68 | +7: iteration 10680/ 21553 | consumed samples: 2734080 | consumed tokens: 5599395840 | elapsed time per iteration (s): 0.31 | learning rate: 1.127E-04 | global batch size: 256 | lm loss: 2.612581E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.990 | TFLOPs: 29.13 | +7: iteration 10690/ 21553 | consumed samples: 2736640 | consumed tokens: 5604638720 | elapsed time per iteration (s): 0.30 | learning rate: 1.126E-04 | global batch size: 256 | lm loss: 2.628524E+00 | grad norm: 0.320 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.078 | TFLOPs: 29.76 | +7: iteration 10700/ 21553 | consumed samples: 2739200 | consumed tokens: 5609881600 | elapsed time per iteration (s): 0.30 | learning rate: 1.124E-04 | global batch size: 256 | lm loss: 2.611903E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.875 | TFLOPs: 29.40 | +7: iteration 10710/ 21553 | consumed samples: 2741760 | consumed tokens: 5615124480 | elapsed time per iteration (s): 0.30 | learning rate: 1.123E-04 | global batch size: 256 | lm loss: 2.634899E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.530 | TFLOPs: 29.70 | +7: iteration 10720/ 21553 | consumed samples: 2744320 | consumed tokens: 5620367360 | elapsed time per iteration (s): 0.30 | learning rate: 1.122E-04 | global batch size: 256 | lm loss: 2.644517E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.065 | TFLOPs: 29.62 | +7: iteration 10730/ 21553 | consumed samples: 2746880 | consumed tokens: 5625610240 | elapsed time per iteration (s): 0.30 | learning rate: 1.120E-04 | global batch size: 256 | lm loss: 2.610460E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.566 | TFLOPs: 29.67 | +7: iteration 10740/ 21553 | consumed samples: 2749440 | consumed tokens: 5630853120 | elapsed time per iteration (s): 0.30 | learning rate: 1.119E-04 | global batch size: 256 | lm loss: 2.589349E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.432 | TFLOPs: 29.42 | +7: iteration 10750/ 21553 | consumed samples: 2752000 | consumed tokens: 5636096000 | elapsed time per iteration (s): 0.30 | learning rate: 1.118E-04 | global batch size: 256 | lm loss: 2.616341E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.073 | TFLOPs: 29.93 | +7: iteration 10760/ 21553 | consumed samples: 2754560 | consumed tokens: 5641338880 | elapsed time per iteration (s): 0.30 | learning rate: 1.116E-04 | global batch size: 256 | lm loss: 2.619981E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.682 | TFLOPs: 29.57 | +7: iteration 10770/ 21553 | consumed samples: 2757120 | consumed tokens: 5646581760 | elapsed time per iteration (s): 0.31 | learning rate: 1.115E-04 | global batch size: 256 | lm loss: 2.582827E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.811 | TFLOPs: 28.91 | +7: iteration 10780/ 21553 | consumed samples: 2759680 | consumed tokens: 5651824640 | elapsed time per iteration (s): 0.30 | learning rate: 1.114E-04 | global batch size: 256 | lm loss: 2.575685E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.242 | TFLOPs: 29.59 | +7: iteration 10790/ 21553 | consumed samples: 2762240 | consumed tokens: 5657067520 | elapsed time per iteration (s): 0.30 | learning rate: 1.112E-04 | global batch size: 256 | lm loss: 2.626087E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.415 | TFLOPs: 29.81 | +7: iteration 10800/ 21553 | consumed samples: 2764800 | consumed tokens: 5662310400 | elapsed time per iteration (s): 0.32 | learning rate: 1.111E-04 | global batch size: 256 | lm loss: 2.588220E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 800.239 | TFLOPs: 28.01 | +7: iteration 10810/ 21553 | consumed samples: 2767360 | consumed tokens: 5667553280 | elapsed time per iteration (s): 0.30 | learning rate: 1.110E-04 | global batch size: 256 | lm loss: 2.598022E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.159 | TFLOPs: 29.62 | +7: iteration 10820/ 21553 | consumed samples: 2769920 | consumed tokens: 5672796160 | elapsed time per iteration (s): 0.30 | learning rate: 1.109E-04 | global batch size: 256 | lm loss: 2.605686E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.058 | TFLOPs: 29.72 | +7: iteration 10830/ 21553 | consumed samples: 2772480 | consumed tokens: 5678039040 | elapsed time per iteration (s): 0.30 | learning rate: 1.107E-04 | global batch size: 256 | lm loss: 2.613777E+00 | grad norm: 0.315 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.541 | TFLOPs: 29.81 | +7: iteration 10840/ 21553 | consumed samples: 2775040 | consumed tokens: 5683281920 | elapsed time per iteration (s): 0.30 | learning rate: 1.106E-04 | global batch size: 256 | lm loss: 2.606325E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.300 | TFLOPs: 29.77 | +7: iteration 10850/ 21553 | consumed samples: 2777600 | consumed tokens: 5688524800 | elapsed time per iteration (s): 0.30 | learning rate: 1.105E-04 | global batch size: 256 | lm loss: 2.571762E+00 | grad norm: 0.319 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.473 | TFLOPs: 29.77 | +7: iteration 10860/ 21553 | consumed samples: 2780160 | consumed tokens: 5693767680 | elapsed time per iteration (s): 0.31 | learning rate: 1.103E-04 | global batch size: 256 | lm loss: 2.639084E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.017 | TFLOPs: 29.37 | +7: iteration 10870/ 21553 | consumed samples: 2782720 | consumed tokens: 5699010560 | elapsed time per iteration (s): 0.31 | learning rate: 1.102E-04 | global batch size: 256 | lm loss: 2.615474E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.765 | TFLOPs: 29.26 | +7: iteration 10880/ 21553 | consumed samples: 2785280 | consumed tokens: 5704253440 | elapsed time per iteration (s): 0.30 | learning rate: 1.101E-04 | global batch size: 256 | lm loss: 2.629205E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.503 | TFLOPs: 29.49 | +7: iteration 10890/ 21553 | consumed samples: 2787840 | consumed tokens: 5709496320 | elapsed time per iteration (s): 0.30 | learning rate: 1.099E-04 | global batch size: 256 | lm loss: 2.610460E+00 | grad norm: 0.330 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.919 | TFLOPs: 29.79 | +7: iteration 10900/ 21553 | consumed samples: 2790400 | consumed tokens: 5714739200 | elapsed time per iteration (s): 0.30 | learning rate: 1.098E-04 | global batch size: 256 | lm loss: 2.602619E+00 | grad norm: 0.334 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.224 | TFLOPs: 30.04 | +7: iteration 10910/ 21553 | consumed samples: 2792960 | consumed tokens: 5719982080 | elapsed time per iteration (s): 0.30 | learning rate: 1.097E-04 | global batch size: 256 | lm loss: 2.637338E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.418 | TFLOPs: 29.46 | +7: iteration 10920/ 21553 | consumed samples: 2795520 | consumed tokens: 5725224960 | elapsed time per iteration (s): 0.30 | learning rate: 1.095E-04 | global batch size: 256 | lm loss: 2.584002E+00 | grad norm: 0.266 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.373 | TFLOPs: 29.80 | +7: iteration 10930/ 21553 | consumed samples: 2798080 | consumed tokens: 5730467840 | elapsed time per iteration (s): 0.30 | learning rate: 1.094E-04 | global batch size: 256 | lm loss: 2.594339E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.678 | TFLOPs: 29.92 | +7: iteration 10940/ 21553 | consumed samples: 2800640 | consumed tokens: 5735710720 | elapsed time per iteration (s): 0.31 | learning rate: 1.093E-04 | global batch size: 256 | lm loss: 2.565329E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.377 | TFLOPs: 28.61 | +7: iteration 10950/ 21553 | consumed samples: 2803200 | consumed tokens: 5740953600 | elapsed time per iteration (s): 0.30 | learning rate: 1.091E-04 | global batch size: 256 | lm loss: 2.628182E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.726 | TFLOPs: 29.75 | +7: iteration 10960/ 21553 | consumed samples: 2805760 | consumed tokens: 5746196480 | elapsed time per iteration (s): 0.30 | learning rate: 1.090E-04 | global batch size: 256 | lm loss: 2.599510E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.190 | TFLOPs: 29.52 | +7: iteration 10970/ 21553 | consumed samples: 2808320 | consumed tokens: 5751439360 | elapsed time per iteration (s): 0.31 | learning rate: 1.089E-04 | global batch size: 256 | lm loss: 2.606549E+00 | grad norm: 0.268 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.869 | TFLOPs: 28.91 | +7: iteration 10980/ 21553 | consumed samples: 2810880 | consumed tokens: 5756682240 | elapsed time per iteration (s): 0.32 | learning rate: 1.087E-04 | global batch size: 256 | lm loss: 2.602980E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.932 | TFLOPs: 28.28 | +7: iteration 10990/ 21553 | consumed samples: 2813440 | consumed tokens: 5761925120 | elapsed time per iteration (s): 0.30 | learning rate: 1.086E-04 | global batch size: 256 | lm loss: 2.613717E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.769 | TFLOPs: 29.40 | +7: iteration 11000/ 21553 | consumed samples: 2816000 | consumed tokens: 5767168000 | elapsed time per iteration (s): 0.31 | learning rate: 1.085E-04 | global batch size: 256 | lm loss: 2.612201E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 814.091 | TFLOPs: 28.50 | +7: ------------------------------------------------------------------------------------------- +7: valid loss at iteration 11000 | lm loss value: 2.516994E+00 | lm loss PPL: 1.239129E+01 | +7: ------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 11000 to checkpoints_146m +0: [2022-11-25 18:28:54,581] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step11000 is begin to save! +0: [2022-11-25 18:28:54,585] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_01-model_00-model_states.pt... +0: [2022-11-25 18:28:54,667] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_01-model_00-model_states.pt. +0: [2022-11-25 18:28:54,667] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_03-model_00-model_states.pt... +0: [2022-11-25 18:28:54,683] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_03-model_00-model_states.pt. +0: [2022-11-25 18:28:54,683] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_04-model_00-model_states.pt... +0: [2022-11-25 18:28:54,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_04-model_00-model_states.pt. +0: [2022-11-25 18:28:54,698] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_05-model_00-model_states.pt... +0: [2022-11-25 18:28:54,713] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_05-model_00-model_states.pt. +0: [2022-11-25 18:28:54,713] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_06-model_00-model_states.pt... +0: [2022-11-25 18:28:54,728] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_06-model_00-model_states.pt. +0: [2022-11-25 18:28:54,728] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_07-model_00-model_states.pt... +0: [2022-11-25 18:28:54,743] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_07-model_00-model_states.pt. +0: [2022-11-25 18:28:54,743] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_08-model_00-model_states.pt... +0: [2022-11-25 18:28:54,757] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_08-model_00-model_states.pt. +0: [2022-11-25 18:28:54,757] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_09-model_00-model_states.pt... +0: [2022-11-25 18:28:54,772] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_09-model_00-model_states.pt. +0: [2022-11-25 18:28:54,772] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_10-model_00-model_states.pt... +0: [2022-11-25 18:28:54,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_10-model_00-model_states.pt. +0: [2022-11-25 18:28:54,787] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_11-model_00-model_states.pt... +0: [2022-11-25 18:28:54,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_11-model_00-model_states.pt. +0: [2022-11-25 18:28:54,802] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_12-model_00-model_states.pt... +0: [2022-11-25 18:28:54,816] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_12-model_00-model_states.pt. +0: [2022-11-25 18:28:54,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_13-model_00-model_states.pt... +0: [2022-11-25 18:28:54,831] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_13-model_00-model_states.pt. +0: [2022-11-25 18:28:54,831] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_14-model_00-model_states.pt... +0: [2022-11-25 18:28:54,845] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_14-model_00-model_states.pt. +0: [2022-11-25 18:28:54,846] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_15-model_00-model_states.pt... +0: [2022-11-25 18:28:54,860] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_15-model_00-model_states.pt. +0: [2022-11-25 18:28:54,861] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_16-model_00-model_states.pt... +0: [2022-11-25 18:28:54,875] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_16-model_00-model_states.pt. +0: [2022-11-25 18:28:54,876] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_17-model_00-model_states.pt... +0: [2022-11-25 18:28:54,890] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_17-model_00-model_states.pt. +0: [2022-11-25 18:28:54,891] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/layer_19-model_00-model_states.pt... +0: [2022-11-25 18:28:54,892] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/layer_19-model_00-model_states.pt. +0: [2022-11-25 18:28:54,892] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step11000/mp_rank_00_model_states.pt +0: [2022-11-25 18:28:54,892] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/mp_rank_00_model_states.pt... +0: [2022-11-25 18:28:54,895] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/mp_rank_00_model_states.pt. +0: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:28:54,912] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:28:54,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step11000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:28:54,945] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:28:54,945] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:28:54,946] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 18:28:54,946] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:28:54,946] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:28:54,946] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +5: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2022-11-25 18:28:54,946] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:28:54,946] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2022-11-25 18:28:54,947] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:28:54,947] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:28:54,947] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +5: [2022-11-25 18:28:54,947] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +3: [2022-11-25 18:28:54,947] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2022-11-25 18:28:54,947] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2022-11-25 18:28:54,947] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:28:54,947] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 18:28:54,947] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2022-11-25 18:28:54,947] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:28:54,947] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2022-11-25 18:28:54,945] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2022-11-25 18:28:54,948] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +7: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2022-11-25 18:28:54,948] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2022-11-25 18:28:54,949] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:28:54,949] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 18:28:54,949] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2022-11-25 18:28:54,949] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:28:54,949] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 18:28:54,949] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2022-11-25 18:28:54,949] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:28:54,950] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 18:28:54,950] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:28:54,944] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:28:54,947] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2022-11-25 18:28:54,947] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2022-11-25 18:28:54,944] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +6: [2022-11-25 18:28:54,945] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +4: [2022-11-25 18:28:54,947] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:28:54,944] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2022-11-25 18:28:54,945] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +4: [2022-11-25 18:28:54,947] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +2: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2022-11-25 18:28:54,947] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2022-11-25 18:28:54,945] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +6: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:28:54,945] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:28:54,948] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +2: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:28:54,946] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +4: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2022-11-25 18:28:54,946] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +6: [2022-11-25 18:28:54,946] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2022-11-25 18:28:54,947] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:28:54,948] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +2: [2022-11-25 18:28:54,948] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +6: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2022-11-25 18:28:54,949] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:28:54,949] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:28:54,949] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +2: [2022-11-25 18:28:54,949] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +6: [2022-11-25 18:28:54,949] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2022-11-25 18:28:54,949] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2022-11-25 18:28:54,950] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:28:54,950] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:28:54,950] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 18:28:54,950] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2022-11-25 18:28:54,950] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:28:54,950] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 18:28:54,950] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2022-11-25 18:28:54,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:28:54,951] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 18:28:54,951] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2022-11-25 18:28:54,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:28:54,948] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 18:28:54,948] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2022-11-25 18:28:54,950] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:28:54,950] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 18:28:54,950] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2022-11-25 18:28:54,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:28:54,951] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 18:28:54,951] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 18:28:54,951] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2022-11-25 18:28:54,951] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2022-11-25 18:28:54,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:28:54,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:28:54,951] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 18:28:54,951] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 18:28:54,951] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2022-11-25 18:28:54,951] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2022-11-25 18:28:54,952] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:28:54,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 18:28:54,952] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2022-11-25 18:28:54,952] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:28:54,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +6: [2022-11-25 18:28:54,952] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:28:54,952] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2022-11-25 18:28:54,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +0: [2022-11-25 18:28:54,952] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:28:54,952] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2022-11-25 18:28:54,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 18:28:54,952] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2022-11-25 18:28:54,952] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:28:54,952] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:28:54,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +6: [2022-11-25 18:28:54,952] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:28:54,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 18:28:54,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:28:54,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:28:54,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:28:54,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:28:54,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:28:54,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 18:28:54,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2022-11-25 18:28:54,954] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:28:54,954] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:28:54,954] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:28:54,954] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 18:28:54,954] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +5: [2022-11-25 18:28:54,954] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +2: [2022-11-25 18:28:54,954] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2022-11-25 18:28:54,954] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2022-11-25 18:28:54,954] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2022-11-25 18:28:54,954] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:28:54,955] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 18:28:54,955] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2022-11-25 18:28:54,955] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:28:54,955] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 18:28:54,955] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2022-11-25 18:28:54,955] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:28:54,955] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3: [2022-11-25 18:28:54,955] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:28:54,955] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2022-11-25 18:28:54,955] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 18:28:54,955] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2022-11-25 18:28:54,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:28:54,956] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +5: [2022-11-25 18:28:54,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:28:54,956] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2022-11-25 18:28:54,956] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 18:28:54,956] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2022-11-25 18:28:54,958] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:28:54,958] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 18:28:54,958] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2022-11-25 18:28:54,958] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:28:54,958] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2022-11-25 18:28:54,959] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2022-11-25 18:28:54,967] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step11000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 18:28:54,967] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: successfully saved checkpoint at iteration 11000 to checkpoints_146m +7: time (ms) | save-checkpoint: 393.41 +7: iteration 11010/ 21553 | consumed samples: 2818560 | consumed tokens: 5772410880 | elapsed time per iteration (s): 0.38 | learning rate: 1.083E-04 | global batch size: 256 | lm loss: 2.589563E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 681.392 | TFLOPs: 23.85 | +7: iteration 11020/ 21553 | consumed samples: 2821120 | consumed tokens: 5777653760 | elapsed time per iteration (s): 0.33 | learning rate: 1.082E-04 | global batch size: 256 | lm loss: 2.643732E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 779.468 | TFLOPs: 27.29 | +7: iteration 11030/ 21553 | consumed samples: 2823680 | consumed tokens: 5782896640 | elapsed time per iteration (s): 0.32 | learning rate: 1.081E-04 | global batch size: 256 | lm loss: 2.577187E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 801.495 | TFLOPs: 28.06 | +7: iteration 11040/ 21553 | consumed samples: 2826240 | consumed tokens: 5788139520 | elapsed time per iteration (s): 0.31 | learning rate: 1.079E-04 | global batch size: 256 | lm loss: 2.607040E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.730 | TFLOPs: 29.12 | +7: iteration 11050/ 21553 | consumed samples: 2828800 | consumed tokens: 5793382400 | elapsed time per iteration (s): 0.30 | learning rate: 1.078E-04 | global batch size: 256 | lm loss: 2.622644E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.073 | TFLOPs: 29.55 | +7: iteration 11060/ 21553 | consumed samples: 2831360 | consumed tokens: 5798625280 | elapsed time per iteration (s): 0.31 | learning rate: 1.077E-04 | global batch size: 256 | lm loss: 2.583331E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.596 | TFLOPs: 29.22 | +7: iteration 11070/ 21553 | consumed samples: 2833920 | consumed tokens: 5803868160 | elapsed time per iteration (s): 0.30 | learning rate: 1.075E-04 | global batch size: 256 | lm loss: 2.597362E+00 | grad norm: 0.318 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.492 | TFLOPs: 29.67 | +7: iteration 11080/ 21553 | consumed samples: 2836480 | consumed tokens: 5809111040 | elapsed time per iteration (s): 0.31 | learning rate: 1.074E-04 | global batch size: 256 | lm loss: 2.619227E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.056 | TFLOPs: 29.16 | +7: iteration 11090/ 21553 | consumed samples: 2839040 | consumed tokens: 5814353920 | elapsed time per iteration (s): 0.30 | learning rate: 1.073E-04 | global batch size: 256 | lm loss: 2.597601E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.354 | TFLOPs: 29.73 | +7: iteration 11100/ 21553 | consumed samples: 2841600 | consumed tokens: 5819596800 | elapsed time per iteration (s): 0.30 | learning rate: 1.071E-04 | global batch size: 256 | lm loss: 2.624665E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.267 | TFLOPs: 29.66 | +7: iteration 11110/ 21553 | consumed samples: 2844160 | consumed tokens: 5824839680 | elapsed time per iteration (s): 0.30 | learning rate: 1.070E-04 | global batch size: 256 | lm loss: 2.633682E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.394 | TFLOPs: 29.73 | +7: iteration 11120/ 21553 | consumed samples: 2846720 | consumed tokens: 5830082560 | elapsed time per iteration (s): 0.31 | learning rate: 1.069E-04 | global batch size: 256 | lm loss: 2.618224E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.281 | TFLOPs: 29.24 | +7: iteration 11130/ 21553 | consumed samples: 2849280 | consumed tokens: 5835325440 | elapsed time per iteration (s): 0.30 | learning rate: 1.067E-04 | global batch size: 256 | lm loss: 2.581337E+00 | grad norm: 0.329 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.169 | TFLOPs: 29.59 | +7: iteration 11140/ 21553 | consumed samples: 2851840 | consumed tokens: 5840568320 | elapsed time per iteration (s): 0.31 | learning rate: 1.066E-04 | global batch size: 256 | lm loss: 2.608457E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.229 | TFLOPs: 28.71 | +7: iteration 11150/ 21553 | consumed samples: 2854400 | consumed tokens: 5845811200 | elapsed time per iteration (s): 0.30 | learning rate: 1.065E-04 | global batch size: 256 | lm loss: 2.595848E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.646 | TFLOPs: 29.88 | +7: iteration 11160/ 21553 | consumed samples: 2856960 | consumed tokens: 5851054080 | elapsed time per iteration (s): 0.31 | learning rate: 1.063E-04 | global batch size: 256 | lm loss: 2.614627E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.379 | TFLOPs: 29.28 | +7: iteration 11170/ 21553 | consumed samples: 2859520 | consumed tokens: 5856296960 | elapsed time per iteration (s): 0.31 | learning rate: 1.062E-04 | global batch size: 256 | lm loss: 2.604108E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.742 | TFLOPs: 28.91 | +7: iteration 11180/ 21553 | consumed samples: 2862080 | consumed tokens: 5861539840 | elapsed time per iteration (s): 0.30 | learning rate: 1.061E-04 | global batch size: 256 | lm loss: 2.609578E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.617 | TFLOPs: 29.43 | +7: iteration 11190/ 21553 | consumed samples: 2864640 | consumed tokens: 5866782720 | elapsed time per iteration (s): 0.32 | learning rate: 1.060E-04 | global batch size: 256 | lm loss: 2.554633E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 803.191 | TFLOPs: 28.12 | +7: iteration 11200/ 21553 | consumed samples: 2867200 | consumed tokens: 5872025600 | elapsed time per iteration (s): 0.30 | learning rate: 1.058E-04 | global batch size: 256 | lm loss: 2.650439E+00 | grad norm: 0.263 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.178 | TFLOPs: 29.69 | +7: iteration 11210/ 21553 | consumed samples: 2869760 | consumed tokens: 5877268480 | elapsed time per iteration (s): 0.30 | learning rate: 1.057E-04 | global batch size: 256 | lm loss: 2.609118E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.921 | TFLOPs: 29.68 | +7: iteration 11220/ 21553 | consumed samples: 2872320 | consumed tokens: 5882511360 | elapsed time per iteration (s): 0.31 | learning rate: 1.056E-04 | global batch size: 256 | lm loss: 2.633778E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.713 | TFLOPs: 29.33 | +7: iteration 11230/ 21553 | consumed samples: 2874880 | consumed tokens: 5887754240 | elapsed time per iteration (s): 0.30 | learning rate: 1.054E-04 | global batch size: 256 | lm loss: 2.616763E+00 | grad norm: 0.264 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.215 | TFLOPs: 29.59 | +7: iteration 11240/ 21553 | consumed samples: 2877440 | consumed tokens: 5892997120 | elapsed time per iteration (s): 0.30 | learning rate: 1.053E-04 | global batch size: 256 | lm loss: 2.626945E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.711 | TFLOPs: 29.47 | +7: iteration 11250/ 21553 | consumed samples: 2880000 | consumed tokens: 5898240000 | elapsed time per iteration (s): 0.30 | learning rate: 1.052E-04 | global batch size: 256 | lm loss: 2.617523E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.560 | TFLOPs: 29.53 | +7: iteration 11260/ 21553 | consumed samples: 2882560 | consumed tokens: 5903482880 | elapsed time per iteration (s): 0.31 | learning rate: 1.050E-04 | global batch size: 256 | lm loss: 2.601171E+00 | grad norm: 0.266 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.394 | TFLOPs: 29.24 | +7: iteration 11270/ 21553 | consumed samples: 2885120 | consumed tokens: 5908725760 | elapsed time per iteration (s): 0.30 | learning rate: 1.049E-04 | global batch size: 256 | lm loss: 2.607209E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.085 | TFLOPs: 29.93 | +7: iteration 11280/ 21553 | consumed samples: 2887680 | consumed tokens: 5913968640 | elapsed time per iteration (s): 0.30 | learning rate: 1.048E-04 | global batch size: 256 | lm loss: 2.639464E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.883 | TFLOPs: 29.51 | +7: iteration 11290/ 21553 | consumed samples: 2890240 | consumed tokens: 5919211520 | elapsed time per iteration (s): 0.31 | learning rate: 1.046E-04 | global batch size: 256 | lm loss: 2.595852E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.020 | TFLOPs: 28.60 | +7: iteration 11300/ 21553 | consumed samples: 2892800 | consumed tokens: 5924454400 | elapsed time per iteration (s): 0.30 | learning rate: 1.045E-04 | global batch size: 256 | lm loss: 2.582059E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.610 | TFLOPs: 29.74 | +7: iteration 11310/ 21553 | consumed samples: 2895360 | consumed tokens: 5929697280 | elapsed time per iteration (s): 0.30 | learning rate: 1.044E-04 | global batch size: 256 | lm loss: 2.592768E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.751 | TFLOPs: 29.61 | +7: iteration 11320/ 21553 | consumed samples: 2897920 | consumed tokens: 5934940160 | elapsed time per iteration (s): 0.31 | learning rate: 1.042E-04 | global batch size: 256 | lm loss: 2.588853E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.656 | TFLOPs: 28.94 | +7: iteration 11330/ 21553 | consumed samples: 2900480 | consumed tokens: 5940183040 | elapsed time per iteration (s): 0.30 | learning rate: 1.041E-04 | global batch size: 256 | lm loss: 2.585718E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.807 | TFLOPs: 29.43 | +7: iteration 11340/ 21553 | consumed samples: 2903040 | consumed tokens: 5945425920 | elapsed time per iteration (s): 0.30 | learning rate: 1.040E-04 | global batch size: 256 | lm loss: 2.607607E+00 | grad norm: 0.314 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.731 | TFLOPs: 30.03 | +7: iteration 11350/ 21553 | consumed samples: 2905600 | consumed tokens: 5950668800 | elapsed time per iteration (s): 0.30 | learning rate: 1.038E-04 | global batch size: 256 | lm loss: 2.612529E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.101 | TFLOPs: 29.55 | +7: iteration 11360/ 21553 | consumed samples: 2908160 | consumed tokens: 5955911680 | elapsed time per iteration (s): 0.31 | learning rate: 1.037E-04 | global batch size: 256 | lm loss: 2.627950E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.795 | TFLOPs: 29.33 | +7: iteration 11370/ 21553 | consumed samples: 2910720 | consumed tokens: 5961154560 | elapsed time per iteration (s): 0.30 | learning rate: 1.036E-04 | global batch size: 256 | lm loss: 2.605774E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.668 | TFLOPs: 29.78 | +7: iteration 11380/ 21553 | consumed samples: 2913280 | consumed tokens: 5966397440 | elapsed time per iteration (s): 0.30 | learning rate: 1.034E-04 | global batch size: 256 | lm loss: 2.600987E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.365 | TFLOPs: 29.77 | +7: iteration 11390/ 21553 | consumed samples: 2915840 | consumed tokens: 5971640320 | elapsed time per iteration (s): 0.30 | learning rate: 1.033E-04 | global batch size: 256 | lm loss: 2.593010E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.582 | TFLOPs: 29.81 | +7: iteration 11400/ 21553 | consumed samples: 2918400 | consumed tokens: 5976883200 | elapsed time per iteration (s): 0.30 | learning rate: 1.032E-04 | global batch size: 256 | lm loss: 2.633270E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.811 | TFLOPs: 29.61 | +7: iteration 11410/ 21553 | consumed samples: 2920960 | consumed tokens: 5982126080 | elapsed time per iteration (s): 0.31 | learning rate: 1.030E-04 | global batch size: 256 | lm loss: 2.571841E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.906 | TFLOPs: 28.63 | +7: iteration 11420/ 21553 | consumed samples: 2923520 | consumed tokens: 5987368960 | elapsed time per iteration (s): 0.30 | learning rate: 1.029E-04 | global batch size: 256 | lm loss: 2.570444E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.823 | TFLOPs: 29.82 | +7: iteration 11430/ 21553 | consumed samples: 2926080 | consumed tokens: 5992611840 | elapsed time per iteration (s): 0.30 | learning rate: 1.028E-04 | global batch size: 256 | lm loss: 2.590953E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.825 | TFLOPs: 29.54 | +7: iteration 11440/ 21553 | consumed samples: 2928640 | consumed tokens: 5997854720 | elapsed time per iteration (s): 0.31 | learning rate: 1.026E-04 | global batch size: 256 | lm loss: 2.620933E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.004 | TFLOPs: 29.37 | +7: iteration 11450/ 21553 | consumed samples: 2931200 | consumed tokens: 6003097600 | elapsed time per iteration (s): 0.30 | learning rate: 1.025E-04 | global batch size: 256 | lm loss: 2.578906E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.403 | TFLOPs: 29.81 | +7: iteration 11460/ 21553 | consumed samples: 2933760 | consumed tokens: 6008340480 | elapsed time per iteration (s): 0.31 | learning rate: 1.024E-04 | global batch size: 256 | lm loss: 2.598417E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.195 | TFLOPs: 29.03 | +7: iteration 11470/ 21553 | consumed samples: 2936320 | consumed tokens: 6013583360 | elapsed time per iteration (s): 0.32 | learning rate: 1.022E-04 | global batch size: 256 | lm loss: 2.618549E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 812.527 | TFLOPs: 28.44 | +7: iteration 11480/ 21553 | consumed samples: 2938880 | consumed tokens: 6018826240 | elapsed time per iteration (s): 0.30 | learning rate: 1.021E-04 | global batch size: 256 | lm loss: 2.627407E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.314 | TFLOPs: 29.70 | +7: iteration 11490/ 21553 | consumed samples: 2941440 | consumed tokens: 6024069120 | elapsed time per iteration (s): 0.32 | learning rate: 1.020E-04 | global batch size: 256 | lm loss: 2.611620E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 803.308 | TFLOPs: 28.12 | +7: iteration 11500/ 21553 | consumed samples: 2944000 | consumed tokens: 6029312000 | elapsed time per iteration (s): 0.30 | learning rate: 1.019E-04 | global batch size: 256 | lm loss: 2.626348E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.949 | TFLOPs: 29.82 | +7: iteration 11510/ 21553 | consumed samples: 2946560 | consumed tokens: 6034554880 | elapsed time per iteration (s): 0.35 | learning rate: 1.017E-04 | global batch size: 256 | lm loss: 2.623844E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 724.006 | TFLOPs: 25.35 | +7: iteration 11520/ 21553 | consumed samples: 2949120 | consumed tokens: 6039797760 | elapsed time per iteration (s): 0.31 | learning rate: 1.016E-04 | global batch size: 256 | lm loss: 2.599716E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.714 | TFLOPs: 29.33 | +7: iteration 11530/ 21553 | consumed samples: 2951680 | consumed tokens: 6045040640 | elapsed time per iteration (s): 0.30 | learning rate: 1.015E-04 | global batch size: 256 | lm loss: 2.570422E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.729 | TFLOPs: 29.85 | +7: iteration 11540/ 21553 | consumed samples: 2954240 | consumed tokens: 6050283520 | elapsed time per iteration (s): 0.31 | learning rate: 1.013E-04 | global batch size: 256 | lm loss: 2.618689E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.086 | TFLOPs: 29.23 | +7: iteration 11550/ 21553 | consumed samples: 2956800 | consumed tokens: 6055526400 | elapsed time per iteration (s): 0.30 | learning rate: 1.012E-04 | global batch size: 256 | lm loss: 2.615455E+00 | grad norm: 0.268 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.629 | TFLOPs: 30.06 | +7: iteration 11560/ 21553 | consumed samples: 2959360 | consumed tokens: 6060769280 | elapsed time per iteration (s): 0.30 | learning rate: 1.011E-04 | global batch size: 256 | lm loss: 2.613397E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.202 | TFLOPs: 29.48 | +7: iteration 11570/ 21553 | consumed samples: 2961920 | consumed tokens: 6066012160 | elapsed time per iteration (s): 0.30 | learning rate: 1.009E-04 | global batch size: 256 | lm loss: 2.592150E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.429 | TFLOPs: 29.81 | +7: iteration 11580/ 21553 | consumed samples: 2964480 | consumed tokens: 6071255040 | elapsed time per iteration (s): 0.30 | learning rate: 1.008E-04 | global batch size: 256 | lm loss: 2.572529E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.994 | TFLOPs: 29.44 | +7: iteration 11590/ 21553 | consumed samples: 2967040 | consumed tokens: 6076497920 | elapsed time per iteration (s): 0.31 | learning rate: 1.007E-04 | global batch size: 256 | lm loss: 2.607141E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.276 | TFLOPs: 29.17 | +7: iteration 11600/ 21553 | consumed samples: 2969600 | consumed tokens: 6081740800 | elapsed time per iteration (s): 0.30 | learning rate: 1.005E-04 | global batch size: 256 | lm loss: 2.609544E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.527 | TFLOPs: 29.56 | +7: iteration 11610/ 21553 | consumed samples: 2972160 | consumed tokens: 6086983680 | elapsed time per iteration (s): 0.30 | learning rate: 1.004E-04 | global batch size: 256 | lm loss: 2.610488E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.310 | TFLOPs: 29.63 | +7: iteration 11620/ 21553 | consumed samples: 2974720 | consumed tokens: 6092226560 | elapsed time per iteration (s): 0.31 | learning rate: 1.003E-04 | global batch size: 256 | lm loss: 2.638314E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.512 | TFLOPs: 29.14 | +7: iteration 11630/ 21553 | consumed samples: 2977280 | consumed tokens: 6097469440 | elapsed time per iteration (s): 0.30 | learning rate: 1.001E-04 | global batch size: 256 | lm loss: 2.586383E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.591 | TFLOPs: 29.46 | +7: iteration 11640/ 21553 | consumed samples: 2979840 | consumed tokens: 6102712320 | elapsed time per iteration (s): 0.31 | learning rate: 1.000E-04 | global batch size: 256 | lm loss: 2.621054E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.255 | TFLOPs: 29.34 | +7: iteration 11650/ 21553 | consumed samples: 2982400 | consumed tokens: 6107955200 | elapsed time per iteration (s): 0.31 | learning rate: 9.987E-05 | global batch size: 256 | lm loss: 2.571589E+00 | grad norm: 0.327 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.317 | TFLOPs: 29.31 | +7: iteration 11660/ 21553 | consumed samples: 2984960 | consumed tokens: 6113198080 | elapsed time per iteration (s): 0.31 | learning rate: 9.974E-05 | global batch size: 256 | lm loss: 2.604666E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.706 | TFLOPs: 29.33 | +7: iteration 11670/ 21553 | consumed samples: 2987520 | consumed tokens: 6118440960 | elapsed time per iteration (s): 0.30 | learning rate: 9.961E-05 | global batch size: 256 | lm loss: 2.580572E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.509 | TFLOPs: 29.49 | +7: iteration 11680/ 21553 | consumed samples: 2990080 | consumed tokens: 6123683840 | elapsed time per iteration (s): 0.31 | learning rate: 9.948E-05 | global batch size: 256 | lm loss: 2.624378E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.722 | TFLOPs: 29.29 | +7: iteration 11690/ 21553 | consumed samples: 2992640 | consumed tokens: 6128926720 | elapsed time per iteration (s): 0.31 | learning rate: 9.935E-05 | global batch size: 256 | lm loss: 2.617675E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.017 | TFLOPs: 29.27 | +7: iteration 11700/ 21553 | consumed samples: 2995200 | consumed tokens: 6134169600 | elapsed time per iteration (s): 0.30 | learning rate: 9.922E-05 | global batch size: 256 | lm loss: 2.578833E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.031 | TFLOPs: 29.51 | +7: iteration 11710/ 21553 | consumed samples: 2997760 | consumed tokens: 6139412480 | elapsed time per iteration (s): 0.30 | learning rate: 9.909E-05 | global batch size: 256 | lm loss: 2.602187E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.524 | TFLOPs: 29.77 | +7: iteration 11720/ 21553 | consumed samples: 3000320 | consumed tokens: 6144655360 | elapsed time per iteration (s): 0.30 | learning rate: 9.895E-05 | global batch size: 256 | lm loss: 2.590317E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.509 | TFLOPs: 29.98 | +7: iteration 11730/ 21553 | consumed samples: 3002880 | consumed tokens: 6149898240 | elapsed time per iteration (s): 0.31 | learning rate: 9.882E-05 | global batch size: 256 | lm loss: 2.590113E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.778 | TFLOPs: 29.26 | +7: iteration 11740/ 21553 | consumed samples: 3005440 | consumed tokens: 6155141120 | elapsed time per iteration (s): 0.32 | learning rate: 9.869E-05 | global batch size: 256 | lm loss: 2.600140E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.977 | TFLOPs: 28.29 | +7: iteration 11750/ 21553 | consumed samples: 3008000 | consumed tokens: 6160384000 | elapsed time per iteration (s): 0.31 | learning rate: 9.856E-05 | global batch size: 256 | lm loss: 2.569343E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.219 | TFLOPs: 29.38 | +7: iteration 11760/ 21553 | consumed samples: 3010560 | consumed tokens: 6165626880 | elapsed time per iteration (s): 0.30 | learning rate: 9.843E-05 | global batch size: 256 | lm loss: 2.606646E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.873 | TFLOPs: 29.58 | +7: iteration 11770/ 21553 | consumed samples: 3013120 | consumed tokens: 6170869760 | elapsed time per iteration (s): 0.30 | learning rate: 9.830E-05 | global batch size: 256 | lm loss: 2.614967E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.283 | TFLOPs: 29.98 | +7: iteration 11780/ 21553 | consumed samples: 3015680 | consumed tokens: 6176112640 | elapsed time per iteration (s): 0.30 | learning rate: 9.817E-05 | global batch size: 256 | lm loss: 2.578937E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.113 | TFLOPs: 29.48 | +7: iteration 11790/ 21553 | consumed samples: 3018240 | consumed tokens: 6181355520 | elapsed time per iteration (s): 0.30 | learning rate: 9.803E-05 | global batch size: 256 | lm loss: 2.591706E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.226 | TFLOPs: 29.59 | +7: iteration 11800/ 21553 | consumed samples: 3020800 | consumed tokens: 6186598400 | elapsed time per iteration (s): 0.30 | learning rate: 9.790E-05 | global batch size: 256 | lm loss: 2.594373E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.328 | TFLOPs: 29.59 | +7: iteration 11810/ 21553 | consumed samples: 3023360 | consumed tokens: 6191841280 | elapsed time per iteration (s): 0.31 | learning rate: 9.777E-05 | global batch size: 256 | lm loss: 2.580364E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.674 | TFLOPs: 29.18 | +7: iteration 11820/ 21553 | consumed samples: 3025920 | consumed tokens: 6197084160 | elapsed time per iteration (s): 0.30 | learning rate: 9.764E-05 | global batch size: 256 | lm loss: 2.597133E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.558 | TFLOPs: 30.02 | +7: iteration 11830/ 21553 | consumed samples: 3028480 | consumed tokens: 6202327040 | elapsed time per iteration (s): 0.30 | learning rate: 9.751E-05 | global batch size: 256 | lm loss: 2.535968E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.935 | TFLOPs: 29.75 | +7: iteration 11840/ 21553 | consumed samples: 3031040 | consumed tokens: 6207569920 | elapsed time per iteration (s): 0.30 | learning rate: 9.738E-05 | global batch size: 256 | lm loss: 2.580954E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.967 | TFLOPs: 30.00 | +7: iteration 11850/ 21553 | consumed samples: 3033600 | consumed tokens: 6212812800 | elapsed time per iteration (s): 0.30 | learning rate: 9.725E-05 | global batch size: 256 | lm loss: 2.613846E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.563 | TFLOPs: 29.46 | +7: iteration 11860/ 21553 | consumed samples: 3036160 | consumed tokens: 6218055680 | elapsed time per iteration (s): 0.31 | learning rate: 9.712E-05 | global batch size: 256 | lm loss: 2.542932E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.127 | TFLOPs: 29.31 | +7: iteration 11870/ 21553 | consumed samples: 3038720 | consumed tokens: 6223298560 | elapsed time per iteration (s): 0.30 | learning rate: 9.698E-05 | global batch size: 256 | lm loss: 2.595724E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.037 | TFLOPs: 29.86 | +7: iteration 11880/ 21553 | consumed samples: 3041280 | consumed tokens: 6228541440 | elapsed time per iteration (s): 0.31 | learning rate: 9.685E-05 | global batch size: 256 | lm loss: 2.590285E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.736 | TFLOPs: 28.80 | +7: iteration 11890/ 21553 | consumed samples: 3043840 | consumed tokens: 6233784320 | elapsed time per iteration (s): 0.31 | learning rate: 9.672E-05 | global batch size: 256 | lm loss: 2.607381E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.060 | TFLOPs: 28.95 | +7: iteration 11900/ 21553 | consumed samples: 3046400 | consumed tokens: 6239027200 | elapsed time per iteration (s): 0.30 | learning rate: 9.659E-05 | global batch size: 256 | lm loss: 2.565806E+00 | grad norm: 0.307 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.528 | TFLOPs: 29.84 | +7: iteration 11910/ 21553 | consumed samples: 3048960 | consumed tokens: 6244270080 | elapsed time per iteration (s): 0.30 | learning rate: 9.646E-05 | global batch size: 256 | lm loss: 2.622275E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.377 | TFLOPs: 29.70 | +7: iteration 11920/ 21553 | consumed samples: 3051520 | consumed tokens: 6249512960 | elapsed time per iteration (s): 0.30 | learning rate: 9.633E-05 | global batch size: 256 | lm loss: 2.620263E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.736 | TFLOPs: 29.47 | +7: iteration 11930/ 21553 | consumed samples: 3054080 | consumed tokens: 6254755840 | elapsed time per iteration (s): 0.30 | learning rate: 9.620E-05 | global batch size: 256 | lm loss: 2.615629E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.272 | TFLOPs: 29.63 | +7: iteration 11940/ 21553 | consumed samples: 3056640 | consumed tokens: 6259998720 | elapsed time per iteration (s): 0.31 | learning rate: 9.607E-05 | global batch size: 256 | lm loss: 2.613091E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.111 | TFLOPs: 29.27 | +7: iteration 11950/ 21553 | consumed samples: 3059200 | consumed tokens: 6265241600 | elapsed time per iteration (s): 0.30 | learning rate: 9.594E-05 | global batch size: 256 | lm loss: 2.552127E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.370 | TFLOPs: 29.52 | +7: iteration 11960/ 21553 | consumed samples: 3061760 | consumed tokens: 6270484480 | elapsed time per iteration (s): 0.30 | learning rate: 9.581E-05 | global batch size: 256 | lm loss: 2.594017E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.458 | TFLOPs: 29.74 | +7: iteration 11970/ 21553 | consumed samples: 3064320 | consumed tokens: 6275727360 | elapsed time per iteration (s): 0.32 | learning rate: 9.567E-05 | global batch size: 256 | lm loss: 2.589773E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 804.937 | TFLOPs: 28.18 | +7: iteration 11980/ 21553 | consumed samples: 3066880 | consumed tokens: 6280970240 | elapsed time per iteration (s): 0.30 | learning rate: 9.554E-05 | global batch size: 256 | lm loss: 2.621776E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.432 | TFLOPs: 29.70 | +7: iteration 11990/ 21553 | consumed samples: 3069440 | consumed tokens: 6286213120 | elapsed time per iteration (s): 0.31 | learning rate: 9.541E-05 | global batch size: 256 | lm loss: 2.592188E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.066 | TFLOPs: 29.37 | +0: [2022-11-25 18:34:00,583] [INFO] [logging.py:68:log_dist] [Rank 0] step=12000, skipped=0, lr=[9.528206376265585e-05, 9.528206376265585e-05, 9.528206376265585e-05], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 12000/ 21553 | consumed samples: 3072000 | consumed tokens: 6291456000 | elapsed time per iteration (s): 0.30 | learning rate: 9.528E-05 | global batch size: 256 | lm loss: 2.618328E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.957 | TFLOPs: 29.93 | +0: steps: 12000 loss: 2.6764 iter time (s): 0.303 samples/sec: 845.270 +7: ------------------------------------------------------------------------------------------- +7: valid loss at iteration 12000 | lm loss value: 2.550534E+00 | lm loss PPL: 1.281394E+01 | +7: ------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 12000 to checkpoints_146m +0: [2022-11-25 18:34:00,705] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step12000 is begin to save! +0: [2022-11-25 18:34:00,709] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_01-model_00-model_states.pt... +0: [2022-11-25 18:34:00,792] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_01-model_00-model_states.pt. +0: [2022-11-25 18:34:00,792] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_03-model_00-model_states.pt... +0: [2022-11-25 18:34:00,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_03-model_00-model_states.pt. +0: [2022-11-25 18:34:00,807] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_04-model_00-model_states.pt... +0: [2022-11-25 18:34:00,822] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_04-model_00-model_states.pt. +0: [2022-11-25 18:34:00,822] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_05-model_00-model_states.pt... +0: [2022-11-25 18:34:00,837] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_05-model_00-model_states.pt. +0: [2022-11-25 18:34:00,837] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_06-model_00-model_states.pt... +0: [2022-11-25 18:34:00,851] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_06-model_00-model_states.pt. +0: [2022-11-25 18:34:00,851] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_07-model_00-model_states.pt... +0: [2022-11-25 18:34:00,866] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_07-model_00-model_states.pt. +0: [2022-11-25 18:34:00,866] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_08-model_00-model_states.pt... +0: [2022-11-25 18:34:00,881] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_08-model_00-model_states.pt. +0: [2022-11-25 18:34:00,881] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_09-model_00-model_states.pt... +0: [2022-11-25 18:34:00,896] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_09-model_00-model_states.pt. +0: [2022-11-25 18:34:00,896] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_10-model_00-model_states.pt... +0: [2022-11-25 18:34:00,910] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_10-model_00-model_states.pt. +0: [2022-11-25 18:34:00,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_11-model_00-model_states.pt... +0: [2022-11-25 18:34:00,925] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_11-model_00-model_states.pt. +0: [2022-11-25 18:34:00,926] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_12-model_00-model_states.pt... +0: [2022-11-25 18:34:00,940] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_12-model_00-model_states.pt. +0: [2022-11-25 18:34:00,940] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_13-model_00-model_states.pt... +0: [2022-11-25 18:34:00,955] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_13-model_00-model_states.pt. +0: [2022-11-25 18:34:00,955] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_14-model_00-model_states.pt... +0: [2022-11-25 18:34:00,969] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_14-model_00-model_states.pt. +0: [2022-11-25 18:34:00,969] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_15-model_00-model_states.pt... +0: [2022-11-25 18:34:00,984] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_15-model_00-model_states.pt. +0: [2022-11-25 18:34:00,984] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_16-model_00-model_states.pt... +0: [2022-11-25 18:34:00,998] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_16-model_00-model_states.pt. +0: [2022-11-25 18:34:00,998] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_17-model_00-model_states.pt... +0: [2022-11-25 18:34:01,013] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_17-model_00-model_states.pt. +0: [2022-11-25 18:34:01,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/layer_19-model_00-model_states.pt... +0: [2022-11-25 18:34:01,014] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/layer_19-model_00-model_states.pt. +0: [2022-11-25 18:34:01,015] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step12000/mp_rank_00_model_states.pt +0: [2022-11-25 18:34:01,015] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/mp_rank_00_model_states.pt... +0: [2022-11-25 18:34:01,017] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/mp_rank_00_model_states.pt. +0: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:34:01,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step12000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:34:01,066] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:34:01,066] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 18:34:01,066] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:34:01,070] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:34:01,070] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2022-11-25 18:34:01,070] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2022-11-25 18:34:01,071] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:34:01,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 18:34:01,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2022-11-25 18:34:01,071] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:34:01,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 18:34:01,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2022-11-25 18:34:01,071] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:34:01,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 18:34:01,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:34:01,070] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +4: [2022-11-25 18:34:01,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +6: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:34:01,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2022-11-25 18:34:01,070] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +4: [2022-11-25 18:34:01,071] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2022-11-25 18:34:01,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +6: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:34:01,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2022-11-25 18:34:01,070] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +4: [2022-11-25 18:34:01,071] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2022-11-25 18:34:01,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +6: [2022-11-25 18:34:01,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:34:01,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2022-11-25 18:34:01,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2022-11-25 18:34:01,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2022-11-25 18:34:01,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:34:01,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:34:01,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:34:01,072] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 18:34:01,072] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +7: [2022-11-25 18:34:01,072] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +0: [2022-11-25 18:34:01,072] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2022-11-25 18:34:01,072] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2022-11-25 18:34:01,072] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2022-11-25 18:34:01,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:34:01,072] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 18:34:01,072] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2022-11-25 18:34:01,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:34:01,072] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2022-11-25 18:34:01,072] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2022-11-25 18:34:01,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:34:01,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:34:01,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 18:34:01,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2022-11-25 18:34:01,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:34:01,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 18:34:01,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2022-11-25 18:34:01,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:34:01,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2022-11-25 18:34:01,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2022-11-25 18:34:01,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:34:01,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 18:34:01,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2022-11-25 18:34:01,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:34:01,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2022-11-25 18:34:01,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2022-11-25 18:34:01,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:34:01,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 18:34:01,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2022-11-25 18:34:01,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:34:01,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +6: [2022-11-25 18:34:01,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:34:01,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +0: [2022-11-25 18:34:01,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2022-11-25 18:34:01,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2022-11-25 18:34:01,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:34:01,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 18:34:01,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:34:01,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:34:01,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +3: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:34:01,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +3: [2022-11-25 18:34:01,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +5: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:34:01,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +4: [2022-11-25 18:34:01,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +1: [2022-11-25 18:34:01,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2022-11-25 18:34:01,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2022-11-25 18:34:01,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2022-11-25 18:34:01,076] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:34:01,076] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 18:34:01,076] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2022-11-25 18:34:01,076] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:34:01,076] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2022-11-25 18:34:01,076] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2022-11-25 18:34:01,076] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:34:01,076] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:34:01,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 18:34:01,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:34:01,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +4: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2022-11-25 18:34:01,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +6: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2022-11-25 18:34:01,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:34:01,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 18:34:01,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2022-11-25 18:34:01,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:34:01,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 18:34:01,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2022-11-25 18:34:01,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:34:01,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:34:01,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +7: [2022-11-25 18:34:01,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +0: [2022-11-25 18:34:01,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2022-11-25 18:34:01,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2022-11-25 18:34:01,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:34:01,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 18:34:01,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2022-11-25 18:34:01,076] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 18:34:01,076] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2022-11-25 18:34:01,077] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2022-11-25 18:34:01,082] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:34:01,082] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:34:01,082] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 18:34:01,082] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 18:34:01,082] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:34:01,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2022-11-25 18:34:01,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2022-11-25 18:34:01,082] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 18:34:01,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2022-11-25 18:34:01,079] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:34:01,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:34:01,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:34:01,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 18:34:01,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2022-11-25 18:34:01,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2022-11-25 18:34:01,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2022-11-25 18:34:01,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:34:01,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2022-11-25 18:34:01,079] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2022-11-25 18:34:01,079] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:34:01,079] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 18:34:01,079] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:34:01,079] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2022-11-25 18:34:01,079] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 18:34:01,079] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2022-11-25 18:34:01,083] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:34:01,083] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:34:01,083] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 18:34:01,083] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 18:34:01,083] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2022-11-25 18:34:01,083] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2022-11-25 18:34:01,086] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:34:01,086] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:34:01,086] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 18:34:01,086] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 18:34:01,086] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2022-11-25 18:34:01,086] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2022-11-25 18:34:01,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 18:34:01,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2022-11-25 18:34:01,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:34:01,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 18:34:01,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2022-11-25 18:34:01,097] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step12000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 18:34:01,097] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: successfully saved checkpoint at iteration 12000 to checkpoints_146m +7: time (ms) | save-checkpoint: 398.15 +7: iteration 12010/ 21553 | consumed samples: 3074560 | consumed tokens: 6296698880 | elapsed time per iteration (s): 0.37 | learning rate: 9.515E-05 | global batch size: 256 | lm loss: 2.590589E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 686.895 | TFLOPs: 24.05 | +7: iteration 12020/ 21553 | consumed samples: 3077120 | consumed tokens: 6301941760 | elapsed time per iteration (s): 0.30 | learning rate: 9.502E-05 | global batch size: 256 | lm loss: 2.551457E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.749 | TFLOPs: 29.99 | +7: iteration 12030/ 21553 | consumed samples: 3079680 | consumed tokens: 6307184640 | elapsed time per iteration (s): 0.31 | learning rate: 9.489E-05 | global batch size: 256 | lm loss: 2.566990E+00 | grad norm: 0.266 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.274 | TFLOPs: 28.89 | +7: iteration 12040/ 21553 | consumed samples: 3082240 | consumed tokens: 6312427520 | elapsed time per iteration (s): 0.30 | learning rate: 9.476E-05 | global batch size: 256 | lm loss: 2.586736E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.303 | TFLOPs: 29.56 | +7: iteration 12050/ 21553 | consumed samples: 3084800 | consumed tokens: 6317670400 | elapsed time per iteration (s): 0.30 | learning rate: 9.463E-05 | global batch size: 256 | lm loss: 2.565861E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.454 | TFLOPs: 29.74 | +7: iteration 12060/ 21553 | consumed samples: 3087360 | consumed tokens: 6322913280 | elapsed time per iteration (s): 0.30 | learning rate: 9.450E-05 | global batch size: 256 | lm loss: 2.573514E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.142 | TFLOPs: 29.87 | +7: iteration 12070/ 21553 | consumed samples: 3089920 | consumed tokens: 6328156160 | elapsed time per iteration (s): 0.31 | learning rate: 9.437E-05 | global batch size: 256 | lm loss: 2.598828E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.383 | TFLOPs: 29.07 | +7: iteration 12080/ 21553 | consumed samples: 3092480 | consumed tokens: 6333399040 | elapsed time per iteration (s): 0.31 | learning rate: 9.424E-05 | global batch size: 256 | lm loss: 2.580033E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.717 | TFLOPs: 29.33 | +7: iteration 12090/ 21553 | consumed samples: 3095040 | consumed tokens: 6338641920 | elapsed time per iteration (s): 0.30 | learning rate: 9.411E-05 | global batch size: 256 | lm loss: 2.575579E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.523 | TFLOPs: 29.70 | +7: iteration 12100/ 21553 | consumed samples: 3097600 | consumed tokens: 6343884800 | elapsed time per iteration (s): 0.30 | learning rate: 9.398E-05 | global batch size: 256 | lm loss: 2.571180E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.663 | TFLOPs: 29.64 | +7: iteration 12110/ 21553 | consumed samples: 3100160 | consumed tokens: 6349127680 | elapsed time per iteration (s): 0.30 | learning rate: 9.385E-05 | global batch size: 256 | lm loss: 2.545341E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.499 | TFLOPs: 29.67 | +7: iteration 12120/ 21553 | consumed samples: 3102720 | consumed tokens: 6354370560 | elapsed time per iteration (s): 0.31 | learning rate: 9.372E-05 | global batch size: 256 | lm loss: 2.603357E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.932 | TFLOPs: 29.12 | +7: iteration 12130/ 21553 | consumed samples: 3105280 | consumed tokens: 6359613440 | elapsed time per iteration (s): 0.31 | learning rate: 9.359E-05 | global batch size: 256 | lm loss: 2.607740E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 814.051 | TFLOPs: 28.50 | +7: iteration 12140/ 21553 | consumed samples: 3107840 | consumed tokens: 6364856320 | elapsed time per iteration (s): 0.30 | learning rate: 9.346E-05 | global batch size: 256 | lm loss: 2.603347E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.511 | TFLOPs: 29.46 | +7: iteration 12150/ 21553 | consumed samples: 3110400 | consumed tokens: 6370099200 | elapsed time per iteration (s): 0.30 | learning rate: 9.332E-05 | global batch size: 256 | lm loss: 2.611771E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.062 | TFLOPs: 29.72 | +7: iteration 12160/ 21553 | consumed samples: 3112960 | consumed tokens: 6375342080 | elapsed time per iteration (s): 0.32 | learning rate: 9.319E-05 | global batch size: 256 | lm loss: 2.572646E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 811.532 | TFLOPs: 28.41 | +7: iteration 12170/ 21553 | consumed samples: 3115520 | consumed tokens: 6380584960 | elapsed time per iteration (s): 0.32 | learning rate: 9.306E-05 | global batch size: 256 | lm loss: 2.531855E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.225 | TFLOPs: 28.26 | +7: iteration 12180/ 21553 | consumed samples: 3118080 | consumed tokens: 6385827840 | elapsed time per iteration (s): 0.32 | learning rate: 9.293E-05 | global batch size: 256 | lm loss: 2.572527E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 805.767 | TFLOPs: 28.21 | +7: iteration 12190/ 21553 | consumed samples: 3120640 | consumed tokens: 6391070720 | elapsed time per iteration (s): 0.31 | learning rate: 9.280E-05 | global batch size: 256 | lm loss: 2.572101E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.035 | TFLOPs: 29.13 | +7: iteration 12200/ 21553 | consumed samples: 3123200 | consumed tokens: 6396313600 | elapsed time per iteration (s): 0.31 | learning rate: 9.267E-05 | global batch size: 256 | lm loss: 2.607657E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.700 | TFLOPs: 29.29 | +7: iteration 12210/ 21553 | consumed samples: 3125760 | consumed tokens: 6401556480 | elapsed time per iteration (s): 0.30 | learning rate: 9.254E-05 | global batch size: 256 | lm loss: 2.603169E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.259 | TFLOPs: 29.84 | +7: iteration 12220/ 21553 | consumed samples: 3128320 | consumed tokens: 6406799360 | elapsed time per iteration (s): 0.30 | learning rate: 9.241E-05 | global batch size: 256 | lm loss: 2.592104E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.887 | TFLOPs: 29.75 | +7: iteration 12230/ 21553 | consumed samples: 3130880 | consumed tokens: 6412042240 | elapsed time per iteration (s): 0.30 | learning rate: 9.228E-05 | global batch size: 256 | lm loss: 2.589648E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.607 | TFLOPs: 29.67 | +7: iteration 12240/ 21553 | consumed samples: 3133440 | consumed tokens: 6417285120 | elapsed time per iteration (s): 0.31 | learning rate: 9.215E-05 | global batch size: 256 | lm loss: 2.556743E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.314 | TFLOPs: 29.07 | +7: iteration 12250/ 21553 | consumed samples: 3136000 | consumed tokens: 6422528000 | elapsed time per iteration (s): 0.31 | learning rate: 9.202E-05 | global batch size: 256 | lm loss: 2.589682E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.176 | TFLOPs: 29.38 | +7: iteration 12260/ 21553 | consumed samples: 3138560 | consumed tokens: 6427770880 | elapsed time per iteration (s): 0.30 | learning rate: 9.189E-05 | global batch size: 256 | lm loss: 2.592627E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.823 | TFLOPs: 29.85 | +7: iteration 12270/ 21553 | consumed samples: 3141120 | consumed tokens: 6433013760 | elapsed time per iteration (s): 0.30 | learning rate: 9.177E-05 | global batch size: 256 | lm loss: 2.589995E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.079 | TFLOPs: 29.79 | +7: iteration 12280/ 21553 | consumed samples: 3143680 | consumed tokens: 6438256640 | elapsed time per iteration (s): 0.30 | learning rate: 9.164E-05 | global batch size: 256 | lm loss: 2.588468E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.090 | TFLOPs: 29.51 | +7: iteration 12290/ 21553 | consumed samples: 3146240 | consumed tokens: 6443499520 | elapsed time per iteration (s): 0.30 | learning rate: 9.151E-05 | global batch size: 256 | lm loss: 2.598925E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.354 | TFLOPs: 30.08 | +7: iteration 12300/ 21553 | consumed samples: 3148800 | consumed tokens: 6448742400 | elapsed time per iteration (s): 0.30 | learning rate: 9.138E-05 | global batch size: 256 | lm loss: 2.585420E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.272 | TFLOPs: 29.80 | +7: iteration 12310/ 21553 | consumed samples: 3151360 | consumed tokens: 6453985280 | elapsed time per iteration (s): 0.30 | learning rate: 9.125E-05 | global batch size: 256 | lm loss: 2.605919E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.841 | TFLOPs: 29.54 | +7: iteration 12320/ 21553 | consumed samples: 3153920 | consumed tokens: 6459228160 | elapsed time per iteration (s): 0.30 | learning rate: 9.112E-05 | global batch size: 256 | lm loss: 2.572305E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.809 | TFLOPs: 29.64 | +7: iteration 12330/ 21553 | consumed samples: 3156480 | consumed tokens: 6464471040 | elapsed time per iteration (s): 0.30 | learning rate: 9.099E-05 | global batch size: 256 | lm loss: 2.613672E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.105 | TFLOPs: 29.76 | +7: iteration 12340/ 21553 | consumed samples: 3159040 | consumed tokens: 6469713920 | elapsed time per iteration (s): 0.30 | learning rate: 9.086E-05 | global batch size: 256 | lm loss: 2.600977E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.781 | TFLOPs: 29.96 | +7: iteration 12350/ 21553 | consumed samples: 3161600 | consumed tokens: 6474956800 | elapsed time per iteration (s): 0.30 | learning rate: 9.073E-05 | global batch size: 256 | lm loss: 2.590178E+00 | grad norm: 0.397 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.141 | TFLOPs: 29.45 | +7: iteration 12360/ 21553 | consumed samples: 3164160 | consumed tokens: 6480199680 | elapsed time per iteration (s): 0.31 | learning rate: 9.060E-05 | global batch size: 256 | lm loss: 2.592743E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.408 | TFLOPs: 29.04 | +7: iteration 12370/ 21553 | consumed samples: 3166720 | consumed tokens: 6485442560 | elapsed time per iteration (s): 0.31 | learning rate: 9.047E-05 | global batch size: 256 | lm loss: 2.582269E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.060 | TFLOPs: 28.60 | +7: iteration 12380/ 21553 | consumed samples: 3169280 | consumed tokens: 6490685440 | elapsed time per iteration (s): 0.32 | learning rate: 9.034E-05 | global batch size: 256 | lm loss: 2.595776E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 811.852 | TFLOPs: 28.42 | +7: iteration 12390/ 21553 | consumed samples: 3171840 | consumed tokens: 6495928320 | elapsed time per iteration (s): 0.32 | learning rate: 9.021E-05 | global batch size: 256 | lm loss: 2.558055E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 797.701 | TFLOPs: 27.93 | +7: iteration 12400/ 21553 | consumed samples: 3174400 | consumed tokens: 6501171200 | elapsed time per iteration (s): 0.31 | learning rate: 9.008E-05 | global batch size: 256 | lm loss: 2.613459E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.867 | TFLOPs: 29.30 | +7: iteration 12410/ 21553 | consumed samples: 3176960 | consumed tokens: 6506414080 | elapsed time per iteration (s): 0.31 | learning rate: 8.995E-05 | global batch size: 256 | lm loss: 2.618145E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.312 | TFLOPs: 29.07 | +7: iteration 12420/ 21553 | consumed samples: 3179520 | consumed tokens: 6511656960 | elapsed time per iteration (s): 0.31 | learning rate: 8.982E-05 | global batch size: 256 | lm loss: 2.583735E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.456 | TFLOPs: 29.18 | +7: iteration 12430/ 21553 | consumed samples: 3182080 | consumed tokens: 6516899840 | elapsed time per iteration (s): 0.31 | learning rate: 8.969E-05 | global batch size: 256 | lm loss: 2.592060E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.794 | TFLOPs: 28.70 | +7: iteration 12440/ 21553 | consumed samples: 3184640 | consumed tokens: 6522142720 | elapsed time per iteration (s): 0.31 | learning rate: 8.957E-05 | global batch size: 256 | lm loss: 2.566640E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.794 | TFLOPs: 29.33 | +7: iteration 12450/ 21553 | consumed samples: 3187200 | consumed tokens: 6527385600 | elapsed time per iteration (s): 0.32 | learning rate: 8.944E-05 | global batch size: 256 | lm loss: 2.588533E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 806.219 | TFLOPs: 28.22 | +7: iteration 12460/ 21553 | consumed samples: 3189760 | consumed tokens: 6532628480 | elapsed time per iteration (s): 0.39 | learning rate: 8.931E-05 | global batch size: 256 | lm loss: 2.617271E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 650.804 | TFLOPs: 22.78 | +7: iteration 12470/ 21553 | consumed samples: 3192320 | consumed tokens: 6537871360 | elapsed time per iteration (s): 0.35 | learning rate: 8.918E-05 | global batch size: 256 | lm loss: 2.588492E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 741.179 | TFLOPs: 25.95 | +7: iteration 12480/ 21553 | consumed samples: 3194880 | consumed tokens: 6543114240 | elapsed time per iteration (s): 0.32 | learning rate: 8.905E-05 | global batch size: 256 | lm loss: 2.560762E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 801.072 | TFLOPs: 28.04 | +7: iteration 12490/ 21553 | consumed samples: 3197440 | consumed tokens: 6548357120 | elapsed time per iteration (s): 0.31 | learning rate: 8.892E-05 | global batch size: 256 | lm loss: 2.559372E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.271 | TFLOPs: 29.28 | +7: iteration 12500/ 21553 | consumed samples: 3200000 | consumed tokens: 6553600000 | elapsed time per iteration (s): 0.60 | learning rate: 8.879E-05 | global batch size: 256 | lm loss: 2.564254E+00 | grad norm: 0.317 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 424.281 | TFLOPs: 14.85 | +7: iteration 12510/ 21553 | consumed samples: 3202560 | consumed tokens: 6558842880 | elapsed time per iteration (s): 0.31 | learning rate: 8.866E-05 | global batch size: 256 | lm loss: 2.564607E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.434 | TFLOPs: 29.32 | +7: iteration 12520/ 21553 | consumed samples: 3205120 | consumed tokens: 6564085760 | elapsed time per iteration (s): 0.30 | learning rate: 8.853E-05 | global batch size: 256 | lm loss: 2.593533E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.947 | TFLOPs: 29.47 | +7: iteration 12530/ 21553 | consumed samples: 3207680 | consumed tokens: 6569328640 | elapsed time per iteration (s): 0.30 | learning rate: 8.841E-05 | global batch size: 256 | lm loss: 2.569143E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.770 | TFLOPs: 29.47 | +7: iteration 12540/ 21553 | consumed samples: 3210240 | consumed tokens: 6574571520 | elapsed time per iteration (s): 0.31 | learning rate: 8.828E-05 | global batch size: 256 | lm loss: 2.551745E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.687 | TFLOPs: 29.29 | +7: iteration 12550/ 21553 | consumed samples: 3212800 | consumed tokens: 6579814400 | elapsed time per iteration (s): 0.31 | learning rate: 8.815E-05 | global batch size: 256 | lm loss: 2.585895E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.561 | TFLOPs: 29.18 | +7: iteration 12560/ 21553 | consumed samples: 3215360 | consumed tokens: 6585057280 | elapsed time per iteration (s): 0.30 | learning rate: 8.802E-05 | global batch size: 256 | lm loss: 2.620912E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.161 | TFLOPs: 29.52 | +7: iteration 12570/ 21553 | consumed samples: 3217920 | consumed tokens: 6590300160 | elapsed time per iteration (s): 0.30 | learning rate: 8.789E-05 | global batch size: 256 | lm loss: 2.583041E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.905 | TFLOPs: 29.82 | +7: iteration 12580/ 21553 | consumed samples: 3220480 | consumed tokens: 6595543040 | elapsed time per iteration (s): 0.31 | learning rate: 8.776E-05 | global batch size: 256 | lm loss: 2.583641E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.009 | TFLOPs: 29.27 | +7: iteration 12590/ 21553 | consumed samples: 3223040 | consumed tokens: 6600785920 | elapsed time per iteration (s): 0.30 | learning rate: 8.763E-05 | global batch size: 256 | lm loss: 2.606018E+00 | grad norm: 0.266 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.633 | TFLOPs: 29.39 | +7: iteration 12600/ 21553 | consumed samples: 3225600 | consumed tokens: 6606028800 | elapsed time per iteration (s): 0.31 | learning rate: 8.751E-05 | global batch size: 256 | lm loss: 2.578475E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.973 | TFLOPs: 29.23 | +7: iteration 12610/ 21553 | consumed samples: 3228160 | consumed tokens: 6611271680 | elapsed time per iteration (s): 0.30 | learning rate: 8.738E-05 | global batch size: 256 | lm loss: 2.572207E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.923 | TFLOPs: 29.75 | +7: iteration 12620/ 21553 | consumed samples: 3230720 | consumed tokens: 6616514560 | elapsed time per iteration (s): 0.30 | learning rate: 8.725E-05 | global batch size: 256 | lm loss: 2.552774E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.720 | TFLOPs: 29.64 | +7: iteration 12630/ 21553 | consumed samples: 3233280 | consumed tokens: 6621757440 | elapsed time per iteration (s): 0.32 | learning rate: 8.712E-05 | global batch size: 256 | lm loss: 2.613457E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 799.032 | TFLOPs: 27.97 | +7: iteration 12640/ 21553 | consumed samples: 3235840 | consumed tokens: 6627000320 | elapsed time per iteration (s): 0.31 | learning rate: 8.699E-05 | global batch size: 256 | lm loss: 2.586473E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.940 | TFLOPs: 29.23 | +7: iteration 12650/ 21553 | consumed samples: 3238400 | consumed tokens: 6632243200 | elapsed time per iteration (s): 0.39 | learning rate: 8.687E-05 | global batch size: 256 | lm loss: 2.563609E+00 | grad norm: 0.265 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 659.306 | TFLOPs: 23.08 | +7: iteration 12660/ 21553 | consumed samples: 3240960 | consumed tokens: 6637486080 | elapsed time per iteration (s): 0.31 | learning rate: 8.674E-05 | global batch size: 256 | lm loss: 2.576134E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.623 | TFLOPs: 29.22 | +7: iteration 12670/ 21553 | consumed samples: 3243520 | consumed tokens: 6642728960 | elapsed time per iteration (s): 0.31 | learning rate: 8.661E-05 | global batch size: 256 | lm loss: 2.572902E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.641 | TFLOPs: 28.87 | +7: iteration 12680/ 21553 | consumed samples: 3246080 | consumed tokens: 6647971840 | elapsed time per iteration (s): 0.30 | learning rate: 8.648E-05 | global batch size: 256 | lm loss: 2.607874E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.867 | TFLOPs: 29.44 | +7: iteration 12690/ 21553 | consumed samples: 3248640 | consumed tokens: 6653214720 | elapsed time per iteration (s): 0.31 | learning rate: 8.635E-05 | global batch size: 256 | lm loss: 2.566697E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.762 | TFLOPs: 29.26 | +7: iteration 12700/ 21553 | consumed samples: 3251200 | consumed tokens: 6658457600 | elapsed time per iteration (s): 0.31 | learning rate: 8.623E-05 | global batch size: 256 | lm loss: 2.581631E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.214 | TFLOPs: 29.27 | +7: iteration 12710/ 21553 | consumed samples: 3253760 | consumed tokens: 6663700480 | elapsed time per iteration (s): 0.31 | learning rate: 8.610E-05 | global batch size: 256 | lm loss: 2.567514E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.988 | TFLOPs: 29.13 | +7: iteration 12720/ 21553 | consumed samples: 3256320 | consumed tokens: 6668943360 | elapsed time per iteration (s): 0.30 | learning rate: 8.597E-05 | global batch size: 256 | lm loss: 2.562744E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.433 | TFLOPs: 29.42 | +7: iteration 12730/ 21553 | consumed samples: 3258880 | consumed tokens: 6674186240 | elapsed time per iteration (s): 0.31 | learning rate: 8.584E-05 | global batch size: 256 | lm loss: 2.555476E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.992 | TFLOPs: 28.85 | +7: iteration 12740/ 21553 | consumed samples: 3261440 | consumed tokens: 6679429120 | elapsed time per iteration (s): 0.31 | learning rate: 8.571E-05 | global batch size: 256 | lm loss: 2.567249E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.322 | TFLOPs: 29.38 | +7: iteration 12750/ 21553 | consumed samples: 3264000 | consumed tokens: 6684672000 | elapsed time per iteration (s): 0.30 | learning rate: 8.559E-05 | global batch size: 256 | lm loss: 2.605775E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.342 | TFLOPs: 29.70 | +7: iteration 12760/ 21553 | consumed samples: 3266560 | consumed tokens: 6689914880 | elapsed time per iteration (s): 0.30 | learning rate: 8.546E-05 | global batch size: 256 | lm loss: 2.578038E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.920 | TFLOPs: 29.51 | +7: iteration 12770/ 21553 | consumed samples: 3269120 | consumed tokens: 6695157760 | elapsed time per iteration (s): 0.30 | learning rate: 8.533E-05 | global batch size: 256 | lm loss: 2.565807E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.122 | TFLOPs: 29.80 | +7: iteration 12780/ 21553 | consumed samples: 3271680 | consumed tokens: 6700400640 | elapsed time per iteration (s): 0.30 | learning rate: 8.520E-05 | global batch size: 256 | lm loss: 2.581650E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.125 | TFLOPs: 29.41 | +7: iteration 12790/ 21553 | consumed samples: 3274240 | consumed tokens: 6705643520 | elapsed time per iteration (s): 0.30 | learning rate: 8.508E-05 | global batch size: 256 | lm loss: 2.567331E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.824 | TFLOPs: 29.50 | +7: iteration 12800/ 21553 | consumed samples: 3276800 | consumed tokens: 6710886400 | elapsed time per iteration (s): 0.30 | learning rate: 8.495E-05 | global batch size: 256 | lm loss: 2.590862E+00 | grad norm: 0.312 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.620 | TFLOPs: 29.39 | +7: iteration 12810/ 21553 | consumed samples: 3279360 | consumed tokens: 6716129280 | elapsed time per iteration (s): 0.31 | learning rate: 8.482E-05 | global batch size: 256 | lm loss: 2.577863E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.886 | TFLOPs: 29.37 | +7: iteration 12820/ 21553 | consumed samples: 3281920 | consumed tokens: 6721372160 | elapsed time per iteration (s): 0.30 | learning rate: 8.470E-05 | global batch size: 256 | lm loss: 2.572769E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.663 | TFLOPs: 29.74 | +7: iteration 12830/ 21553 | consumed samples: 3284480 | consumed tokens: 6726615040 | elapsed time per iteration (s): 0.31 | learning rate: 8.457E-05 | global batch size: 256 | lm loss: 2.577705E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.127 | TFLOPs: 29.17 | +7: iteration 12840/ 21553 | consumed samples: 3287040 | consumed tokens: 6731857920 | elapsed time per iteration (s): 0.31 | learning rate: 8.444E-05 | global batch size: 256 | lm loss: 2.572545E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.260 | TFLOPs: 29.31 | +7: iteration 12850/ 21553 | consumed samples: 3289600 | consumed tokens: 6737100800 | elapsed time per iteration (s): 0.30 | learning rate: 8.431E-05 | global batch size: 256 | lm loss: 2.531368E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.020 | TFLOPs: 29.55 | +7: iteration 12860/ 21553 | consumed samples: 3292160 | consumed tokens: 6742343680 | elapsed time per iteration (s): 0.30 | learning rate: 8.419E-05 | global batch size: 256 | lm loss: 2.566104E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.415 | TFLOPs: 29.70 | +7: iteration 12870/ 21553 | consumed samples: 3294720 | consumed tokens: 6747586560 | elapsed time per iteration (s): 0.31 | learning rate: 8.406E-05 | global batch size: 256 | lm loss: 2.562438E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.446 | TFLOPs: 29.07 | +7: iteration 12880/ 21553 | consumed samples: 3297280 | consumed tokens: 6752829440 | elapsed time per iteration (s): 0.30 | learning rate: 8.393E-05 | global batch size: 256 | lm loss: 2.587239E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.016 | TFLOPs: 29.65 | +7: iteration 12890/ 21553 | consumed samples: 3299840 | consumed tokens: 6758072320 | elapsed time per iteration (s): 0.31 | learning rate: 8.381E-05 | global batch size: 256 | lm loss: 2.585959E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.120 | TFLOPs: 28.92 | +7: iteration 12900/ 21553 | consumed samples: 3302400 | consumed tokens: 6763315200 | elapsed time per iteration (s): 0.31 | learning rate: 8.368E-05 | global batch size: 256 | lm loss: 2.580877E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.023 | TFLOPs: 29.16 | +7: iteration 12910/ 21553 | consumed samples: 3304960 | consumed tokens: 6768558080 | elapsed time per iteration (s): 0.31 | learning rate: 8.355E-05 | global batch size: 256 | lm loss: 2.576923E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.087 | TFLOPs: 28.74 | +7: iteration 12920/ 21553 | consumed samples: 3307520 | consumed tokens: 6773800960 | elapsed time per iteration (s): 0.30 | learning rate: 8.343E-05 | global batch size: 256 | lm loss: 2.617716E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.436 | TFLOPs: 29.63 | +7: iteration 12930/ 21553 | consumed samples: 3310080 | consumed tokens: 6779043840 | elapsed time per iteration (s): 0.33 | learning rate: 8.330E-05 | global batch size: 256 | lm loss: 2.587391E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 783.072 | TFLOPs: 27.41 | +7: iteration 12940/ 21553 | consumed samples: 3312640 | consumed tokens: 6784286720 | elapsed time per iteration (s): 0.31 | learning rate: 8.317E-05 | global batch size: 256 | lm loss: 2.589602E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 818.728 | TFLOPs: 28.66 | +7: iteration 12950/ 21553 | consumed samples: 3315200 | consumed tokens: 6789529600 | elapsed time per iteration (s): 0.31 | learning rate: 8.305E-05 | global batch size: 256 | lm loss: 2.566719E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.026 | TFLOPs: 29.37 | +7: iteration 12960/ 21553 | consumed samples: 3317760 | consumed tokens: 6794772480 | elapsed time per iteration (s): 0.32 | learning rate: 8.292E-05 | global batch size: 256 | lm loss: 2.544707E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.709 | TFLOPs: 28.28 | +7: iteration 12970/ 21553 | consumed samples: 3320320 | consumed tokens: 6800015360 | elapsed time per iteration (s): 0.31 | learning rate: 8.279E-05 | global batch size: 256 | lm loss: 2.560716E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.758 | TFLOPs: 28.98 | +7: iteration 12980/ 21553 | consumed samples: 3322880 | consumed tokens: 6805258240 | elapsed time per iteration (s): 0.31 | learning rate: 8.267E-05 | global batch size: 256 | lm loss: 2.591567E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.776 | TFLOPs: 29.12 | +7: iteration 12990/ 21553 | consumed samples: 3325440 | consumed tokens: 6810501120 | elapsed time per iteration (s): 0.30 | learning rate: 8.254E-05 | global batch size: 256 | lm loss: 2.540393E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.436 | TFLOPs: 29.67 | +7: iteration 13000/ 21553 | consumed samples: 3328000 | consumed tokens: 6815744000 | elapsed time per iteration (s): 0.30 | learning rate: 8.242E-05 | global batch size: 256 | lm loss: 2.584682E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.925 | TFLOPs: 29.51 | +7: ------------------------------------------------------------------------------------------- +7: valid loss at iteration 13000 | lm loss value: 2.494754E+00 | lm loss PPL: 1.211876E+01 | +7: ------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 13000 to checkpoints_146m +0: [2022-11-25 18:39:12,872] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step13000 is begin to save! +0: [2022-11-25 18:39:12,878] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_01-model_00-model_states.pt... +0: [2022-11-25 18:39:12,991] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_01-model_00-model_states.pt. +0: [2022-11-25 18:39:12,991] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_03-model_00-model_states.pt... +0: [2022-11-25 18:39:13,007] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_03-model_00-model_states.pt. +0: [2022-11-25 18:39:13,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_04-model_00-model_states.pt... +0: [2022-11-25 18:39:13,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_04-model_00-model_states.pt. +0: [2022-11-25 18:39:13,023] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_05-model_00-model_states.pt... +0: [2022-11-25 18:39:13,038] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_05-model_00-model_states.pt. +0: [2022-11-25 18:39:13,038] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_06-model_00-model_states.pt... +0: [2022-11-25 18:39:13,053] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_06-model_00-model_states.pt. +0: [2022-11-25 18:39:13,054] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_07-model_00-model_states.pt... +0: [2022-11-25 18:39:13,069] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_07-model_00-model_states.pt. +0: [2022-11-25 18:39:13,069] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_08-model_00-model_states.pt... +0: [2022-11-25 18:39:13,084] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_08-model_00-model_states.pt. +0: [2022-11-25 18:39:13,084] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_09-model_00-model_states.pt... +0: [2022-11-25 18:39:13,099] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_09-model_00-model_states.pt. +0: [2022-11-25 18:39:13,099] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_10-model_00-model_states.pt... +0: [2022-11-25 18:39:13,114] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_10-model_00-model_states.pt. +0: [2022-11-25 18:39:13,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_11-model_00-model_states.pt... +0: [2022-11-25 18:39:13,129] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_11-model_00-model_states.pt. +0: [2022-11-25 18:39:13,129] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_12-model_00-model_states.pt... +0: [2022-11-25 18:39:13,144] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_12-model_00-model_states.pt. +0: [2022-11-25 18:39:13,144] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_13-model_00-model_states.pt... +0: [2022-11-25 18:39:13,159] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_13-model_00-model_states.pt. +0: [2022-11-25 18:39:13,160] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_14-model_00-model_states.pt... +0: [2022-11-25 18:39:13,174] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_14-model_00-model_states.pt. +0: [2022-11-25 18:39:13,175] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_15-model_00-model_states.pt... +0: [2022-11-25 18:39:13,189] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_15-model_00-model_states.pt. +0: [2022-11-25 18:39:13,190] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_16-model_00-model_states.pt... +0: [2022-11-25 18:39:13,204] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_16-model_00-model_states.pt. +0: [2022-11-25 18:39:13,205] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_17-model_00-model_states.pt... +0: [2022-11-25 18:39:13,219] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_17-model_00-model_states.pt. +0: [2022-11-25 18:39:13,219] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/layer_19-model_00-model_states.pt... +0: [2022-11-25 18:39:13,221] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/layer_19-model_00-model_states.pt. +0: [2022-11-25 18:39:13,221] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step13000/mp_rank_00_model_states.pt +0: [2022-11-25 18:39:13,221] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/mp_rank_00_model_states.pt... +0: [2022-11-25 18:39:13,225] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/mp_rank_00_model_states.pt. +0: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:39:13,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step13000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:39:13,276] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:39:13,276] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:39:13,276] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 18:39:13,276] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2022-11-25 18:39:13,279] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:39:13,279] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 18:39:13,279] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2022-11-25 18:39:13,279] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:39:13,279] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 18:39:13,279] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2022-11-25 18:39:13,279] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:39:13,279] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 18:39:13,279] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2022-11-25 18:39:13,280] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:39:13,280] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 18:39:13,280] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2022-11-25 18:39:13,280] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:39:13,280] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 18:39:13,281] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2022-11-25 18:39:13,281] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:39:13,282] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 18:39:13,282] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2022-11-25 18:39:13,299] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:39:13,299] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:39:13,299] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:39:13,299] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:39:13,299] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2022-11-25 18:39:13,299] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 18:39:13,299] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2022-11-25 18:39:13,299] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 18:39:13,299] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2022-11-25 18:39:13,299] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2022-11-25 18:39:13,299] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2022-11-25 18:39:13,299] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2022-11-25 18:39:13,300] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:39:13,300] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2022-11-25 18:39:13,300] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2022-11-25 18:39:13,300] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:39:13,300] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2022-11-25 18:39:13,300] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2022-11-25 18:39:13,300] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:39:13,301] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 18:39:13,301] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2022-11-25 18:39:13,301] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:39:13,301] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 18:39:13,301] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2022-11-25 18:39:13,302] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:39:13,302] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:39:13,302] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:39:13,302] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:39:13,303] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 18:39:13,303] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2022-11-25 18:39:13,303] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 18:39:13,303] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 18:39:13,303] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2022-11-25 18:39:13,303] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2022-11-25 18:39:13,303] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2022-11-25 18:39:13,303] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2022-11-25 18:39:13,305] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:39:13,305] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:39:13,305] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:39:13,305] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2022-11-25 18:39:13,305] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 18:39:13,305] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:39:13,305] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2022-11-25 18:39:13,305] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2022-11-25 18:39:13,305] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2022-11-25 18:39:13,305] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 18:39:13,305] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2022-11-25 18:39:13,305] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2022-11-25 18:39:13,307] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 18:39:13,307] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:39:13,312] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2022-11-25 18:39:13,312] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2022-11-25 18:39:13,312] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2022-11-25 18:39:13,312] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 18:39:13,312] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 18:39:13,312] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2022-11-25 18:39:13,312] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 18:39:13,312] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2022-11-25 18:39:13,312] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:39:13,313] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 18:39:13,313] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2022-11-25 18:39:13,313] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 18:39:13,313] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 18:39:13,313] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 18:39:13,313] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 18:39:13,313] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 18:39:13,313] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2022-11-25 18:39:13,313] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:39:13,314] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 18:39:13,314] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 18:39:13,314] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 18:39:13,314] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 18:39:13,314] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 18:39:13,314] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 18:39:13,314] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 18:39:13,314] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2022-11-25 18:39:13,314] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2022-11-25 18:39:13,320] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:39:13,320] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:39:13,320] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:39:13,320] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:39:13,320] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:39:13,320] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:39:13,320] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:39:13,320] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 18:39:13,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step13000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2022-11-25 18:39:13,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: successfully saved checkpoint at iteration 13000 to checkpoints_146m +7: time (ms) | save-checkpoint: 508.07 +7: iteration 13010/ 21553 | consumed samples: 3330560 | consumed tokens: 6820986880 | elapsed time per iteration (s): 0.37 | learning rate: 8.229E-05 | global batch size: 256 | lm loss: 2.589902E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 700.296 | TFLOPs: 24.52 | +7: iteration 13020/ 21553 | consumed samples: 3333120 | consumed tokens: 6826229760 | elapsed time per iteration (s): 0.31 | learning rate: 8.216E-05 | global batch size: 256 | lm loss: 2.567810E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.673 | TFLOPs: 29.29 | +7: iteration 13030/ 21553 | consumed samples: 3335680 | consumed tokens: 6831472640 | elapsed time per iteration (s): 0.30 | learning rate: 8.204E-05 | global batch size: 256 | lm loss: 2.555233E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.028 | TFLOPs: 29.55 | +7: iteration 13040/ 21553 | consumed samples: 3338240 | consumed tokens: 6836715520 | elapsed time per iteration (s): 0.31 | learning rate: 8.191E-05 | global batch size: 256 | lm loss: 2.579287E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.780 | TFLOPs: 29.29 | +7: iteration 13050/ 21553 | consumed samples: 3340800 | consumed tokens: 6841958400 | elapsed time per iteration (s): 0.30 | learning rate: 8.179E-05 | global batch size: 256 | lm loss: 2.535667E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.404 | TFLOPs: 29.91 | +7: iteration 13060/ 21553 | consumed samples: 3343360 | consumed tokens: 6847201280 | elapsed time per iteration (s): 0.30 | learning rate: 8.166E-05 | global batch size: 256 | lm loss: 2.565109E+00 | grad norm: 0.267 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.550 | TFLOPs: 29.53 | +7: iteration 13070/ 21553 | consumed samples: 3345920 | consumed tokens: 6852444160 | elapsed time per iteration (s): 0.31 | learning rate: 8.153E-05 | global batch size: 256 | lm loss: 2.585560E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.949 | TFLOPs: 28.84 | +7: iteration 13080/ 21553 | consumed samples: 3348480 | consumed tokens: 6857687040 | elapsed time per iteration (s): 0.31 | learning rate: 8.141E-05 | global batch size: 256 | lm loss: 2.599351E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.973 | TFLOPs: 29.06 | +7: iteration 13090/ 21553 | consumed samples: 3351040 | consumed tokens: 6862929920 | elapsed time per iteration (s): 0.30 | learning rate: 8.128E-05 | global batch size: 256 | lm loss: 2.568915E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.252 | TFLOPs: 29.59 | +7: iteration 13100/ 21553 | consumed samples: 3353600 | consumed tokens: 6868172800 | elapsed time per iteration (s): 0.30 | learning rate: 8.116E-05 | global batch size: 256 | lm loss: 2.602628E+00 | grad norm: 0.261 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.432 | TFLOPs: 29.53 | +7: iteration 13110/ 21553 | consumed samples: 3356160 | consumed tokens: 6873415680 | elapsed time per iteration (s): 0.31 | learning rate: 8.103E-05 | global batch size: 256 | lm loss: 2.569120E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.291 | TFLOPs: 29.35 | +7: iteration 13120/ 21553 | consumed samples: 3358720 | consumed tokens: 6878658560 | elapsed time per iteration (s): 0.30 | learning rate: 8.091E-05 | global batch size: 256 | lm loss: 2.578152E+00 | grad norm: 0.264 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.577 | TFLOPs: 29.67 | +7: iteration 13130/ 21553 | consumed samples: 3361280 | consumed tokens: 6883901440 | elapsed time per iteration (s): 0.31 | learning rate: 8.078E-05 | global batch size: 256 | lm loss: 2.551689E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.297 | TFLOPs: 29.35 | +7: iteration 13140/ 21553 | consumed samples: 3363840 | consumed tokens: 6889144320 | elapsed time per iteration (s): 0.30 | learning rate: 8.066E-05 | global batch size: 256 | lm loss: 2.528173E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.826 | TFLOPs: 29.72 | +7: iteration 13150/ 21553 | consumed samples: 3366400 | consumed tokens: 6894387200 | elapsed time per iteration (s): 0.31 | learning rate: 8.053E-05 | global batch size: 256 | lm loss: 2.580431E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.021 | TFLOPs: 29.23 | +7: iteration 13160/ 21553 | consumed samples: 3368960 | consumed tokens: 6899630080 | elapsed time per iteration (s): 0.31 | learning rate: 8.041E-05 | global batch size: 256 | lm loss: 2.583057E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.083 | TFLOPs: 28.81 | +7: iteration 13170/ 21553 | consumed samples: 3371520 | consumed tokens: 6904872960 | elapsed time per iteration (s): 0.31 | learning rate: 8.028E-05 | global batch size: 256 | lm loss: 2.573564E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.278 | TFLOPs: 29.17 | +7: iteration 13180/ 21553 | consumed samples: 3374080 | consumed tokens: 6910115840 | elapsed time per iteration (s): 0.31 | learning rate: 8.016E-05 | global batch size: 256 | lm loss: 2.539348E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.837 | TFLOPs: 29.19 | +7: iteration 13190/ 21553 | consumed samples: 3376640 | consumed tokens: 6915358720 | elapsed time per iteration (s): 0.30 | learning rate: 8.003E-05 | global batch size: 256 | lm loss: 2.583123E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.459 | TFLOPs: 29.63 | +7: iteration 13200/ 21553 | consumed samples: 3379200 | consumed tokens: 6920601600 | elapsed time per iteration (s): 0.30 | learning rate: 7.991E-05 | global batch size: 256 | lm loss: 2.570624E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.726 | TFLOPs: 29.75 | +7: iteration 13210/ 21553 | consumed samples: 3381760 | consumed tokens: 6925844480 | elapsed time per iteration (s): 0.31 | learning rate: 7.978E-05 | global batch size: 256 | lm loss: 2.537340E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.518 | TFLOPs: 29.07 | +7: iteration 13220/ 21553 | consumed samples: 3384320 | consumed tokens: 6931087360 | elapsed time per iteration (s): 0.30 | learning rate: 7.966E-05 | global batch size: 256 | lm loss: 2.592156E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.769 | TFLOPs: 29.50 | +7: iteration 13230/ 21553 | consumed samples: 3386880 | consumed tokens: 6936330240 | elapsed time per iteration (s): 0.31 | learning rate: 7.953E-05 | global batch size: 256 | lm loss: 2.581315E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.905 | TFLOPs: 28.63 | +7: iteration 13240/ 21553 | consumed samples: 3389440 | consumed tokens: 6941573120 | elapsed time per iteration (s): 0.31 | learning rate: 7.941E-05 | global batch size: 256 | lm loss: 2.575110E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.701 | TFLOPs: 28.98 | +7: iteration 13250/ 21553 | consumed samples: 3392000 | consumed tokens: 6946816000 | elapsed time per iteration (s): 0.30 | learning rate: 7.928E-05 | global batch size: 256 | lm loss: 2.579580E+00 | grad norm: 0.257 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.478 | TFLOPs: 29.39 | +7: iteration 13260/ 21553 | consumed samples: 3394560 | consumed tokens: 6952058880 | elapsed time per iteration (s): 0.31 | learning rate: 7.916E-05 | global batch size: 256 | lm loss: 2.590501E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.147 | TFLOPs: 29.24 | +7: iteration 13270/ 21553 | consumed samples: 3397120 | consumed tokens: 6957301760 | elapsed time per iteration (s): 0.31 | learning rate: 7.903E-05 | global batch size: 256 | lm loss: 2.588558E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.551 | TFLOPs: 29.32 | +7: iteration 13280/ 21553 | consumed samples: 3399680 | consumed tokens: 6962544640 | elapsed time per iteration (s): 0.31 | learning rate: 7.891E-05 | global batch size: 256 | lm loss: 2.542483E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.962 | TFLOPs: 29.09 | +7: iteration 13290/ 21553 | consumed samples: 3402240 | consumed tokens: 6967787520 | elapsed time per iteration (s): 0.30 | learning rate: 7.878E-05 | global batch size: 256 | lm loss: 2.593436E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.074 | TFLOPs: 29.41 | +7: iteration 13300/ 21553 | consumed samples: 3404800 | consumed tokens: 6973030400 | elapsed time per iteration (s): 0.31 | learning rate: 7.866E-05 | global batch size: 256 | lm loss: 2.582018E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.439 | TFLOPs: 28.72 | +7: iteration 13310/ 21553 | consumed samples: 3407360 | consumed tokens: 6978273280 | elapsed time per iteration (s): 0.31 | learning rate: 7.854E-05 | global batch size: 256 | lm loss: 2.584920E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.925 | TFLOPs: 29.16 | +7: iteration 13320/ 21553 | consumed samples: 3409920 | consumed tokens: 6983516160 | elapsed time per iteration (s): 0.31 | learning rate: 7.841E-05 | global batch size: 256 | lm loss: 2.567162E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.391 | TFLOPs: 28.89 | +7: iteration 13330/ 21553 | consumed samples: 3412480 | consumed tokens: 6988759040 | elapsed time per iteration (s): 0.31 | learning rate: 7.829E-05 | global batch size: 256 | lm loss: 2.562718E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.000 | TFLOPs: 29.09 | +7: iteration 13340/ 21553 | consumed samples: 3415040 | consumed tokens: 6994001920 | elapsed time per iteration (s): 0.31 | learning rate: 7.816E-05 | global batch size: 256 | lm loss: 2.568295E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.050 | TFLOPs: 28.60 | +7: iteration 13350/ 21553 | consumed samples: 3417600 | consumed tokens: 6999244800 | elapsed time per iteration (s): 0.31 | learning rate: 7.804E-05 | global batch size: 256 | lm loss: 2.561988E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.054 | TFLOPs: 29.37 | +7: iteration 13360/ 21553 | consumed samples: 3420160 | consumed tokens: 7004487680 | elapsed time per iteration (s): 0.31 | learning rate: 7.792E-05 | global batch size: 256 | lm loss: 2.573067E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.516 | TFLOPs: 29.07 | +7: iteration 13370/ 21553 | consumed samples: 3422720 | consumed tokens: 7009730560 | elapsed time per iteration (s): 0.30 | learning rate: 7.779E-05 | global batch size: 256 | lm loss: 2.571330E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.451 | TFLOPs: 29.88 | +7: iteration 13380/ 21553 | consumed samples: 3425280 | consumed tokens: 7014973440 | elapsed time per iteration (s): 0.32 | learning rate: 7.767E-05 | global batch size: 256 | lm loss: 2.563669E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 793.902 | TFLOPs: 27.79 | +7: iteration 13390/ 21553 | consumed samples: 3427840 | consumed tokens: 7020216320 | elapsed time per iteration (s): 0.32 | learning rate: 7.754E-05 | global batch size: 256 | lm loss: 2.573513E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 808.619 | TFLOPs: 28.31 | +7: iteration 13400/ 21553 | consumed samples: 3430400 | consumed tokens: 7025459200 | elapsed time per iteration (s): 0.30 | learning rate: 7.742E-05 | global batch size: 256 | lm loss: 2.573842E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.393 | TFLOPs: 29.45 | +7: iteration 13410/ 21553 | consumed samples: 3432960 | consumed tokens: 7030702080 | elapsed time per iteration (s): 0.31 | learning rate: 7.730E-05 | global batch size: 256 | lm loss: 2.600037E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.425 | TFLOPs: 28.97 | +7: iteration 13420/ 21553 | consumed samples: 3435520 | consumed tokens: 7035944960 | elapsed time per iteration (s): 0.31 | learning rate: 7.717E-05 | global batch size: 256 | lm loss: 2.611860E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.048 | TFLOPs: 28.46 | +7: iteration 13430/ 21553 | consumed samples: 3438080 | consumed tokens: 7041187840 | elapsed time per iteration (s): 0.30 | learning rate: 7.705E-05 | global batch size: 256 | lm loss: 2.554832E+00 | grad norm: 0.268 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.687 | TFLOPs: 29.47 | +7: iteration 13440/ 21553 | consumed samples: 3440640 | consumed tokens: 7046430720 | elapsed time per iteration (s): 0.30 | learning rate: 7.693E-05 | global batch size: 256 | lm loss: 2.565869E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.057 | TFLOPs: 29.58 | +7: iteration 13450/ 21553 | consumed samples: 3443200 | consumed tokens: 7051673600 | elapsed time per iteration (s): 0.30 | learning rate: 7.680E-05 | global batch size: 256 | lm loss: 2.582751E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.526 | TFLOPs: 29.56 | +7: iteration 13460/ 21553 | consumed samples: 3445760 | consumed tokens: 7056916480 | elapsed time per iteration (s): 0.31 | learning rate: 7.668E-05 | global batch size: 256 | lm loss: 2.595103E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.388 | TFLOPs: 29.35 | +7: iteration 13470/ 21553 | consumed samples: 3448320 | consumed tokens: 7062159360 | elapsed time per iteration (s): 0.31 | learning rate: 7.656E-05 | global batch size: 256 | lm loss: 2.540635E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.704 | TFLOPs: 29.36 | +7: iteration 13480/ 21553 | consumed samples: 3450880 | consumed tokens: 7067402240 | elapsed time per iteration (s): 0.30 | learning rate: 7.644E-05 | global batch size: 256 | lm loss: 2.562890E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.873 | TFLOPs: 29.79 | +7: iteration 13490/ 21553 | consumed samples: 3453440 | consumed tokens: 7072645120 | elapsed time per iteration (s): 0.30 | learning rate: 7.631E-05 | global batch size: 256 | lm loss: 2.552484E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.531 | TFLOPs: 29.53 | +7: iteration 13500/ 21553 | consumed samples: 3456000 | consumed tokens: 7077888000 | elapsed time per iteration (s): 0.31 | learning rate: 7.619E-05 | global batch size: 256 | lm loss: 2.544411E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.108 | TFLOPs: 29.02 | +7: iteration 13510/ 21553 | consumed samples: 3458560 | consumed tokens: 7083130880 | elapsed time per iteration (s): 0.30 | learning rate: 7.607E-05 | global batch size: 256 | lm loss: 2.587000E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.114 | TFLOPs: 29.90 | +7: iteration 13520/ 21553 | consumed samples: 3461120 | consumed tokens: 7088373760 | elapsed time per iteration (s): 0.31 | learning rate: 7.594E-05 | global batch size: 256 | lm loss: 2.589162E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.827 | TFLOPs: 29.15 | +7: iteration 13530/ 21553 | consumed samples: 3463680 | consumed tokens: 7093616640 | elapsed time per iteration (s): 0.30 | learning rate: 7.582E-05 | global batch size: 256 | lm loss: 2.555357E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.895 | TFLOPs: 29.44 | +7: iteration 13540/ 21553 | consumed samples: 3466240 | consumed tokens: 7098859520 | elapsed time per iteration (s): 0.31 | learning rate: 7.570E-05 | global batch size: 256 | lm loss: 2.595598E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.590 | TFLOPs: 29.36 | +7: iteration 13550/ 21553 | consumed samples: 3468800 | consumed tokens: 7104102400 | elapsed time per iteration (s): 0.30 | learning rate: 7.558E-05 | global batch size: 256 | lm loss: 2.575375E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.544 | TFLOPs: 29.74 | +7: iteration 13560/ 21553 | consumed samples: 3471360 | consumed tokens: 7109345280 | elapsed time per iteration (s): 0.31 | learning rate: 7.545E-05 | global batch size: 256 | lm loss: 2.562574E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.530 | TFLOPs: 29.00 | +7: iteration 13570/ 21553 | consumed samples: 3473920 | consumed tokens: 7114588160 | elapsed time per iteration (s): 0.31 | learning rate: 7.533E-05 | global batch size: 256 | lm loss: 2.583165E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.833 | TFLOPs: 28.60 | +7: iteration 13580/ 21553 | consumed samples: 3476480 | consumed tokens: 7119831040 | elapsed time per iteration (s): 0.30 | learning rate: 7.521E-05 | global batch size: 256 | lm loss: 2.570548E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.785 | TFLOPs: 29.40 | +7: iteration 13590/ 21553 | consumed samples: 3479040 | consumed tokens: 7125073920 | elapsed time per iteration (s): 0.30 | learning rate: 7.509E-05 | global batch size: 256 | lm loss: 2.556681E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.692 | TFLOPs: 29.61 | +7: iteration 13600/ 21553 | consumed samples: 3481600 | consumed tokens: 7130316800 | elapsed time per iteration (s): 0.31 | learning rate: 7.497E-05 | global batch size: 256 | lm loss: 2.613603E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.604 | TFLOPs: 29.29 | +7: iteration 13610/ 21553 | consumed samples: 3484160 | consumed tokens: 7135559680 | elapsed time per iteration (s): 0.30 | learning rate: 7.484E-05 | global batch size: 256 | lm loss: 2.575955E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.627 | TFLOPs: 29.50 | +7: iteration 13620/ 21553 | consumed samples: 3486720 | consumed tokens: 7140802560 | elapsed time per iteration (s): 0.30 | learning rate: 7.472E-05 | global batch size: 256 | lm loss: 2.553546E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.742 | TFLOPs: 29.40 | +7: iteration 13630/ 21553 | consumed samples: 3489280 | consumed tokens: 7146045440 | elapsed time per iteration (s): 0.30 | learning rate: 7.460E-05 | global batch size: 256 | lm loss: 2.576002E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.183 | TFLOPs: 29.62 | +7: iteration 13640/ 21553 | consumed samples: 3491840 | consumed tokens: 7151288320 | elapsed time per iteration (s): 0.30 | learning rate: 7.448E-05 | global batch size: 256 | lm loss: 2.550365E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.235 | TFLOPs: 29.41 | +7: iteration 13650/ 21553 | consumed samples: 3494400 | consumed tokens: 7156531200 | elapsed time per iteration (s): 0.31 | learning rate: 7.436E-05 | global batch size: 256 | lm loss: 2.586444E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.577 | TFLOPs: 28.76 | +7: iteration 13660/ 21553 | consumed samples: 3496960 | consumed tokens: 7161774080 | elapsed time per iteration (s): 0.30 | learning rate: 7.423E-05 | global batch size: 256 | lm loss: 2.565073E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.816 | TFLOPs: 29.54 | +7: iteration 13670/ 21553 | consumed samples: 3499520 | consumed tokens: 7167016960 | elapsed time per iteration (s): 0.31 | learning rate: 7.411E-05 | global batch size: 256 | lm loss: 2.594235E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.238 | TFLOPs: 29.17 | +7: iteration 13680/ 21553 | consumed samples: 3502080 | consumed tokens: 7172259840 | elapsed time per iteration (s): 0.31 | learning rate: 7.399E-05 | global batch size: 256 | lm loss: 2.555797E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.870 | TFLOPs: 28.95 | +7: iteration 13690/ 21553 | consumed samples: 3504640 | consumed tokens: 7177502720 | elapsed time per iteration (s): 0.30 | learning rate: 7.387E-05 | global batch size: 256 | lm loss: 2.582116E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.198 | TFLOPs: 29.73 | +7: iteration 13700/ 21553 | consumed samples: 3507200 | consumed tokens: 7182745600 | elapsed time per iteration (s): 0.32 | learning rate: 7.375E-05 | global batch size: 256 | lm loss: 2.601101E+00 | grad norm: 0.268 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 804.299 | TFLOPs: 28.16 | +7: iteration 13710/ 21553 | consumed samples: 3509760 | consumed tokens: 7187988480 | elapsed time per iteration (s): 0.30 | learning rate: 7.363E-05 | global batch size: 256 | lm loss: 2.565965E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.072 | TFLOPs: 29.69 | +7: iteration 13720/ 21553 | consumed samples: 3512320 | consumed tokens: 7193231360 | elapsed time per iteration (s): 0.33 | learning rate: 7.351E-05 | global batch size: 256 | lm loss: 2.543264E+00 | grad norm: 0.264 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 781.537 | TFLOPs: 27.36 | +7: iteration 13730/ 21553 | consumed samples: 3514880 | consumed tokens: 7198474240 | elapsed time per iteration (s): 0.30 | learning rate: 7.339E-05 | global batch size: 256 | lm loss: 2.577213E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.351 | TFLOPs: 29.42 | +7: iteration 13740/ 21553 | consumed samples: 3517440 | consumed tokens: 7203717120 | elapsed time per iteration (s): 0.30 | learning rate: 7.326E-05 | global batch size: 256 | lm loss: 2.582370E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.176 | TFLOPs: 29.90 | +7: iteration 13750/ 21553 | consumed samples: 3520000 | consumed tokens: 7208960000 | elapsed time per iteration (s): 0.31 | learning rate: 7.314E-05 | global batch size: 256 | lm loss: 2.578203E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.358 | TFLOPs: 28.58 | +7: iteration 13760/ 21553 | consumed samples: 3522560 | consumed tokens: 7214202880 | elapsed time per iteration (s): 0.30 | learning rate: 7.302E-05 | global batch size: 256 | lm loss: 2.544858E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.367 | TFLOPs: 29.45 | +7: iteration 13770/ 21553 | consumed samples: 3525120 | consumed tokens: 7219445760 | elapsed time per iteration (s): 0.30 | learning rate: 7.290E-05 | global batch size: 256 | lm loss: 2.610441E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.931 | TFLOPs: 29.51 | +7: iteration 13780/ 21553 | consumed samples: 3527680 | consumed tokens: 7224688640 | elapsed time per iteration (s): 0.30 | learning rate: 7.278E-05 | global batch size: 256 | lm loss: 2.548929E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.558 | TFLOPs: 29.43 | +7: iteration 13790/ 21553 | consumed samples: 3530240 | consumed tokens: 7229931520 | elapsed time per iteration (s): 0.31 | learning rate: 7.266E-05 | global batch size: 256 | lm loss: 2.581812E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.001 | TFLOPs: 28.57 | +7: iteration 13800/ 21553 | consumed samples: 3532800 | consumed tokens: 7235174400 | elapsed time per iteration (s): 0.30 | learning rate: 7.254E-05 | global batch size: 256 | lm loss: 2.586665E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.079 | TFLOPs: 29.72 | +7: iteration 13810/ 21553 | consumed samples: 3535360 | consumed tokens: 7240417280 | elapsed time per iteration (s): 0.31 | learning rate: 7.242E-05 | global batch size: 256 | lm loss: 2.547916E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.221 | TFLOPs: 28.96 | +7: iteration 13820/ 21553 | consumed samples: 3537920 | consumed tokens: 7245660160 | elapsed time per iteration (s): 0.31 | learning rate: 7.230E-05 | global batch size: 256 | lm loss: 2.564775E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.144 | TFLOPs: 29.31 | +7: iteration 13830/ 21553 | consumed samples: 3540480 | consumed tokens: 7250903040 | elapsed time per iteration (s): 0.30 | learning rate: 7.218E-05 | global batch size: 256 | lm loss: 2.588600E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.825 | TFLOPs: 29.64 | +7: iteration 13840/ 21553 | consumed samples: 3543040 | consumed tokens: 7256145920 | elapsed time per iteration (s): 0.30 | learning rate: 7.206E-05 | global batch size: 256 | lm loss: 2.554181E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.826 | TFLOPs: 29.68 | +7: iteration 13850/ 21553 | consumed samples: 3545600 | consumed tokens: 7261388800 | elapsed time per iteration (s): 0.30 | learning rate: 7.194E-05 | global batch size: 256 | lm loss: 2.578056E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.013 | TFLOPs: 29.55 | +7: iteration 13860/ 21553 | consumed samples: 3548160 | consumed tokens: 7266631680 | elapsed time per iteration (s): 0.30 | learning rate: 7.182E-05 | global batch size: 256 | lm loss: 2.613621E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.081 | TFLOPs: 29.62 | +7: iteration 13870/ 21553 | consumed samples: 3550720 | consumed tokens: 7271874560 | elapsed time per iteration (s): 0.31 | learning rate: 7.170E-05 | global batch size: 256 | lm loss: 2.579944E+00 | grad norm: 0.326 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.446 | TFLOPs: 29.18 | +7: iteration 13880/ 21553 | consumed samples: 3553280 | consumed tokens: 7277117440 | elapsed time per iteration (s): 0.31 | learning rate: 7.158E-05 | global batch size: 256 | lm loss: 2.533319E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.855 | TFLOPs: 29.30 | +7: iteration 13890/ 21553 | consumed samples: 3555840 | consumed tokens: 7282360320 | elapsed time per iteration (s): 0.30 | learning rate: 7.146E-05 | global batch size: 256 | lm loss: 2.562384E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.345 | TFLOPs: 29.42 | +7: iteration 13900/ 21553 | consumed samples: 3558400 | consumed tokens: 7287603200 | elapsed time per iteration (s): 0.30 | learning rate: 7.134E-05 | global batch size: 256 | lm loss: 2.543722E+00 | grad norm: 0.265 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.442 | TFLOPs: 29.67 | +7: iteration 13910/ 21553 | consumed samples: 3560960 | consumed tokens: 7292846080 | elapsed time per iteration (s): 0.32 | learning rate: 7.122E-05 | global batch size: 256 | lm loss: 2.582654E+00 | grad norm: 0.265 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 800.886 | TFLOPs: 28.04 | +7: iteration 13920/ 21553 | consumed samples: 3563520 | consumed tokens: 7298088960 | elapsed time per iteration (s): 0.30 | learning rate: 7.110E-05 | global batch size: 256 | lm loss: 2.552744E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.123 | TFLOPs: 30.01 | +7: iteration 13930/ 21553 | consumed samples: 3566080 | consumed tokens: 7303331840 | elapsed time per iteration (s): 0.30 | learning rate: 7.098E-05 | global batch size: 256 | lm loss: 2.565018E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.186 | TFLOPs: 29.73 | +7: iteration 13940/ 21553 | consumed samples: 3568640 | consumed tokens: 7308574720 | elapsed time per iteration (s): 0.31 | learning rate: 7.086E-05 | global batch size: 256 | lm loss: 2.592727E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.843 | TFLOPs: 29.30 | +7: iteration 13950/ 21553 | consumed samples: 3571200 | consumed tokens: 7313817600 | elapsed time per iteration (s): 0.31 | learning rate: 7.074E-05 | global batch size: 256 | lm loss: 2.584693E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.173 | TFLOPs: 29.34 | +7: iteration 13960/ 21553 | consumed samples: 3573760 | consumed tokens: 7319060480 | elapsed time per iteration (s): 0.31 | learning rate: 7.062E-05 | global batch size: 256 | lm loss: 2.570567E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.827 | TFLOPs: 29.22 | +7: iteration 13970/ 21553 | consumed samples: 3576320 | consumed tokens: 7324303360 | elapsed time per iteration (s): 0.30 | learning rate: 7.050E-05 | global batch size: 256 | lm loss: 2.554182E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.052 | TFLOPs: 29.83 | +7: iteration 13980/ 21553 | consumed samples: 3578880 | consumed tokens: 7329546240 | elapsed time per iteration (s): 0.30 | learning rate: 7.038E-05 | global batch size: 256 | lm loss: 2.571668E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.215 | TFLOPs: 29.55 | +7: iteration 13990/ 21553 | consumed samples: 3581440 | consumed tokens: 7334789120 | elapsed time per iteration (s): 0.30 | learning rate: 7.027E-05 | global batch size: 256 | lm loss: 2.535171E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.586 | TFLOPs: 29.99 | +0: [2022-11-25 18:44:19,514] [INFO] [logging.py:68:log_dist] [Rank 0] step=14000, skipped=0, lr=[7.014654688611906e-05, 7.014654688611906e-05, 7.014654688611906e-05], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 14000/ 21553 | consumed samples: 3584000 | consumed tokens: 7340032000 | elapsed time per iteration (s): 0.31 | learning rate: 7.015E-05 | global batch size: 256 | lm loss: 2.542315E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.947 | TFLOPs: 28.88 | +0: steps: 14000 loss: 2.5635 iter time (s): 0.306 samples/sec: 837.003 +7: ------------------------------------------------------------------------------------------- +7: valid loss at iteration 14000 | lm loss value: 2.472280E+00 | lm loss PPL: 1.184944E+01 | +7: ------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 14000 to checkpoints_146m +0: [2022-11-25 18:44:19,647] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step14000 is begin to save! +0: [2022-11-25 18:44:19,650] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_01-model_00-model_states.pt... +0: [2022-11-25 18:44:19,736] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_01-model_00-model_states.pt. +0: [2022-11-25 18:44:19,737] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_03-model_00-model_states.pt... +0: [2022-11-25 18:44:19,752] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_03-model_00-model_states.pt. +0: [2022-11-25 18:44:19,752] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_04-model_00-model_states.pt... +0: [2022-11-25 18:44:19,767] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_04-model_00-model_states.pt. +0: [2022-11-25 18:44:19,767] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_05-model_00-model_states.pt... +0: [2022-11-25 18:44:19,783] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_05-model_00-model_states.pt. +0: [2022-11-25 18:44:19,783] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_06-model_00-model_states.pt... +0: [2022-11-25 18:44:19,798] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_06-model_00-model_states.pt. +0: [2022-11-25 18:44:19,798] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_07-model_00-model_states.pt... +0: [2022-11-25 18:44:19,812] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_07-model_00-model_states.pt. +0: [2022-11-25 18:44:19,813] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_08-model_00-model_states.pt... +0: [2022-11-25 18:44:19,827] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_08-model_00-model_states.pt. +0: [2022-11-25 18:44:19,828] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_09-model_00-model_states.pt... +0: [2022-11-25 18:44:19,842] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_09-model_00-model_states.pt. +0: [2022-11-25 18:44:19,843] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_10-model_00-model_states.pt... +0: [2022-11-25 18:44:19,857] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_10-model_00-model_states.pt. +0: [2022-11-25 18:44:19,857] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_11-model_00-model_states.pt... +0: [2022-11-25 18:44:19,872] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_11-model_00-model_states.pt. +0: [2022-11-25 18:44:19,872] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_12-model_00-model_states.pt... +0: [2022-11-25 18:44:19,886] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_12-model_00-model_states.pt. +0: [2022-11-25 18:44:19,887] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_13-model_00-model_states.pt... +0: [2022-11-25 18:44:19,901] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_13-model_00-model_states.pt. +0: [2022-11-25 18:44:19,901] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_14-model_00-model_states.pt... +0: [2022-11-25 18:44:19,916] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_14-model_00-model_states.pt. +0: [2022-11-25 18:44:19,916] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_15-model_00-model_states.pt... +0: [2022-11-25 18:44:19,930] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_15-model_00-model_states.pt. +0: [2022-11-25 18:44:19,931] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_16-model_00-model_states.pt... +0: [2022-11-25 18:44:19,945] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_16-model_00-model_states.pt. +0: [2022-11-25 18:44:19,945] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_17-model_00-model_states.pt... +0: [2022-11-25 18:44:19,960] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_17-model_00-model_states.pt. +0: [2022-11-25 18:44:19,960] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/layer_19-model_00-model_states.pt... +0: [2022-11-25 18:44:19,961] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/layer_19-model_00-model_states.pt. +0: [2022-11-25 18:44:19,962] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step14000/mp_rank_00_model_states.pt +0: [2022-11-25 18:44:19,962] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/mp_rank_00_model_states.pt... +0: [2022-11-25 18:44:19,964] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/mp_rank_00_model_states.pt. +0: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:44:19,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step14000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:44:20,016] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:44:20,016] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:44:20,016] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3: [2022-11-25 18:44:20,016] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +0: [2022-11-25 18:44:20,016] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2022-11-25 18:44:20,016] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2022-11-25 18:44:20,016] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:44:20,017] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:44:20,017] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 18:44:20,017] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2022-11-25 18:44:20,017] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:44:20,018] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 18:44:20,018] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2022-11-25 18:44:20,018] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:44:20,018] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2022-11-25 18:44:20,018] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2022-11-25 18:44:20,018] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:44:20,018] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:44:20,019] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2022-11-25 18:44:20,019] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:44:20,019] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:44:20,019] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +5: [2022-11-25 18:44:20,019] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 18:44:20,019] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 18:44:20,019] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +7: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:44:20,019] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:44:20,019] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 18:44:20,019] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2022-11-25 18:44:20,020] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:44:20,020] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2022-11-25 18:44:20,020] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2022-11-25 18:44:20,020] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:44:20,020] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 18:44:20,020] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2022-11-25 18:44:20,020] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:44:20,020] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:44:20,020] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 18:44:20,020] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 18:44:20,020] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:44:20,020] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2022-11-25 18:44:20,020] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2022-11-25 18:44:20,020] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 18:44:20,020] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2022-11-25 18:44:20,020] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:44:20,021] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 18:44:20,021] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2022-11-25 18:44:20,021] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:44:20,021] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2022-11-25 18:44:20,021] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2022-11-25 18:44:20,022] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:44:20,022] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 18:44:20,022] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2022-11-25 18:44:20,022] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:44:20,022] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 18:44:20,022] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:44:20,023] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +3: [2022-11-25 18:44:20,023] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +5: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:44:20,023] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2022-11-25 18:44:20,023] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:44:20,023] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3: [2022-11-25 18:44:20,023] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +2: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:44:20,024] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 18:44:20,024] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2022-11-25 18:44:20,023] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:44:20,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:44:20,024] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 18:44:20,024] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 18:44:20,024] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 18:44:20,024] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2022-11-25 18:44:20,024] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2022-11-25 18:44:20,024] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2022-11-25 18:44:20,024] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:44:20,024] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:44:20,024] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 18:44:20,024] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 18:44:20,024] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2022-11-25 18:44:20,024] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2022-11-25 18:44:20,024] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:44:20,024] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 18:44:20,024] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2022-11-25 18:44:20,025] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:44:20,025] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 18:44:20,025] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2022-11-25 18:44:20,027] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:44:20,027] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 18:44:20,027] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2022-11-25 18:44:20,027] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:44:20,027] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 18:44:20,027] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2022-11-25 18:44:20,027] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:44:20,027] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 18:44:20,027] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2022-11-25 18:44:20,028] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:44:20,028] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 18:44:20,028] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2022-11-25 18:44:20,028] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:44:20,028] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 18:44:20,028] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2022-11-25 18:44:20,028] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:44:20,028] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 18:44:20,028] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2022-11-25 18:44:20,029] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:44:20,029] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 18:44:20,029] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2022-11-25 18:44:20,032] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 18:44:20,032] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2022-11-25 18:44:20,033] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:44:20,033] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 18:44:20,034] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:44:20,038] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2022-11-25 18:44:20,038] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 18:44:20,038] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 18:44:20,038] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2022-11-25 18:44:20,038] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 18:44:20,038] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 18:44:20,038] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2022-11-25 18:44:20,038] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2022-11-25 18:44:20,038] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:44:20,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 18:44:20,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 18:44:20,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2022-11-25 18:44:20,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 18:44:20,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 18:44:20,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2022-11-25 18:44:20,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2022-11-25 18:44:20,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step14000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2022-11-25 18:44:20,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: successfully saved checkpoint at iteration 14000 to checkpoints_146m +7: time (ms) | save-checkpoint: 413.69 +7: iteration 14010/ 21553 | consumed samples: 3586560 | consumed tokens: 7345274880 | elapsed time per iteration (s): 0.35 | learning rate: 7.003E-05 | global batch size: 256 | lm loss: 2.605725E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 721.748 | TFLOPs: 25.27 | +7: iteration 14020/ 21553 | consumed samples: 3589120 | consumed tokens: 7350517760 | elapsed time per iteration (s): 0.31 | learning rate: 6.991E-05 | global batch size: 256 | lm loss: 2.577376E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.544 | TFLOPs: 29.04 | +7: iteration 14030/ 21553 | consumed samples: 3591680 | consumed tokens: 7355760640 | elapsed time per iteration (s): 0.30 | learning rate: 6.979E-05 | global batch size: 256 | lm loss: 2.587173E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.760 | TFLOPs: 29.54 | +7: iteration 14040/ 21553 | consumed samples: 3594240 | consumed tokens: 7361003520 | elapsed time per iteration (s): 0.30 | learning rate: 6.967E-05 | global batch size: 256 | lm loss: 2.556122E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.340 | TFLOPs: 29.70 | +7: iteration 14050/ 21553 | consumed samples: 3596800 | consumed tokens: 7366246400 | elapsed time per iteration (s): 0.30 | learning rate: 6.955E-05 | global batch size: 256 | lm loss: 2.542619E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.521 | TFLOPs: 29.49 | +7: iteration 14060/ 21553 | consumed samples: 3599360 | consumed tokens: 7371489280 | elapsed time per iteration (s): 0.31 | learning rate: 6.944E-05 | global batch size: 256 | lm loss: 2.550421E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.360 | TFLOPs: 28.93 | +7: iteration 14070/ 21553 | consumed samples: 3601920 | consumed tokens: 7376732160 | elapsed time per iteration (s): 0.30 | learning rate: 6.932E-05 | global batch size: 256 | lm loss: 2.529054E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.481 | TFLOPs: 29.63 | +7: iteration 14080/ 21553 | consumed samples: 3604480 | consumed tokens: 7381975040 | elapsed time per iteration (s): 0.30 | learning rate: 6.920E-05 | global batch size: 256 | lm loss: 2.577988E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.375 | TFLOPs: 29.80 | +7: iteration 14090/ 21553 | consumed samples: 3607040 | consumed tokens: 7387217920 | elapsed time per iteration (s): 0.31 | learning rate: 6.908E-05 | global batch size: 256 | lm loss: 2.537414E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.864 | TFLOPs: 29.16 | +7: iteration 14100/ 21553 | consumed samples: 3609600 | consumed tokens: 7392460800 | elapsed time per iteration (s): 0.30 | learning rate: 6.896E-05 | global batch size: 256 | lm loss: 2.570661E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.954 | TFLOPs: 29.65 | +7: iteration 14110/ 21553 | consumed samples: 3612160 | consumed tokens: 7397703680 | elapsed time per iteration (s): 0.31 | learning rate: 6.884E-05 | global batch size: 256 | lm loss: 2.573512E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.033 | TFLOPs: 28.95 | +7: iteration 14120/ 21553 | consumed samples: 3614720 | consumed tokens: 7402946560 | elapsed time per iteration (s): 0.31 | learning rate: 6.873E-05 | global batch size: 256 | lm loss: 2.573488E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.141 | TFLOPs: 29.06 | +7: iteration 14130/ 21553 | consumed samples: 3617280 | consumed tokens: 7408189440 | elapsed time per iteration (s): 0.31 | learning rate: 6.861E-05 | global batch size: 256 | lm loss: 2.590846E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.153 | TFLOPs: 29.13 | +7: iteration 14140/ 21553 | consumed samples: 3619840 | consumed tokens: 7413432320 | elapsed time per iteration (s): 0.30 | learning rate: 6.849E-05 | global batch size: 256 | lm loss: 2.547353E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.110 | TFLOPs: 29.44 | +7: iteration 14150/ 21553 | consumed samples: 3622400 | consumed tokens: 7418675200 | elapsed time per iteration (s): 0.30 | learning rate: 6.837E-05 | global batch size: 256 | lm loss: 2.586726E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.086 | TFLOPs: 29.93 | +7: iteration 14160/ 21553 | consumed samples: 3624960 | consumed tokens: 7423918080 | elapsed time per iteration (s): 0.31 | learning rate: 6.826E-05 | global batch size: 256 | lm loss: 2.598495E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.824 | TFLOPs: 29.33 | +7: iteration 14170/ 21553 | consumed samples: 3627520 | consumed tokens: 7429160960 | elapsed time per iteration (s): 0.31 | learning rate: 6.814E-05 | global batch size: 256 | lm loss: 2.556865E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.497 | TFLOPs: 29.21 | +7: iteration 14180/ 21553 | consumed samples: 3630080 | consumed tokens: 7434403840 | elapsed time per iteration (s): 0.31 | learning rate: 6.802E-05 | global batch size: 256 | lm loss: 2.603906E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.443 | TFLOPs: 29.32 | +7: iteration 14190/ 21553 | consumed samples: 3632640 | consumed tokens: 7439646720 | elapsed time per iteration (s): 0.30 | learning rate: 6.791E-05 | global batch size: 256 | lm loss: 2.561384E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.673 | TFLOPs: 29.39 | +7: iteration 14200/ 21553 | consumed samples: 3635200 | consumed tokens: 7444889600 | elapsed time per iteration (s): 0.30 | learning rate: 6.779E-05 | global batch size: 256 | lm loss: 2.569359E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.486 | TFLOPs: 29.67 | +7: iteration 14210/ 21553 | consumed samples: 3637760 | consumed tokens: 7450132480 | elapsed time per iteration (s): 0.30 | learning rate: 6.767E-05 | global batch size: 256 | lm loss: 2.544034E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.691 | TFLOPs: 29.50 | +7: iteration 14220/ 21553 | consumed samples: 3640320 | consumed tokens: 7455375360 | elapsed time per iteration (s): 0.30 | learning rate: 6.755E-05 | global batch size: 256 | lm loss: 2.578555E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.852 | TFLOPs: 29.54 | +7: iteration 14230/ 21553 | consumed samples: 3642880 | consumed tokens: 7460618240 | elapsed time per iteration (s): 0.30 | learning rate: 6.744E-05 | global batch size: 256 | lm loss: 2.555728E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.333 | TFLOPs: 29.80 | +7: iteration 14240/ 21553 | consumed samples: 3645440 | consumed tokens: 7465861120 | elapsed time per iteration (s): 0.30 | learning rate: 6.732E-05 | global batch size: 256 | lm loss: 2.578817E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.304 | TFLOPs: 29.87 | +7: iteration 14250/ 21553 | consumed samples: 3648000 | consumed tokens: 7471104000 | elapsed time per iteration (s): 0.30 | learning rate: 6.720E-05 | global batch size: 256 | lm loss: 2.576795E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.298 | TFLOPs: 29.70 | +7: iteration 14260/ 21553 | consumed samples: 3650560 | consumed tokens: 7476346880 | elapsed time per iteration (s): 0.30 | learning rate: 6.709E-05 | global batch size: 256 | lm loss: 2.590882E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.019 | TFLOPs: 29.62 | +7: iteration 14270/ 21553 | consumed samples: 3653120 | consumed tokens: 7481589760 | elapsed time per iteration (s): 0.31 | learning rate: 6.697E-05 | global batch size: 256 | lm loss: 2.544086E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.242 | TFLOPs: 28.85 | +7: iteration 14280/ 21553 | consumed samples: 3655680 | consumed tokens: 7486832640 | elapsed time per iteration (s): 0.30 | learning rate: 6.685E-05 | global batch size: 256 | lm loss: 2.560090E+00 | grad norm: 0.320 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.647 | TFLOPs: 29.71 | +7: iteration 14290/ 21553 | consumed samples: 3658240 | consumed tokens: 7492075520 | elapsed time per iteration (s): 0.31 | learning rate: 6.674E-05 | global batch size: 256 | lm loss: 2.576832E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.941 | TFLOPs: 29.23 | +7: iteration 14300/ 21553 | consumed samples: 3660800 | consumed tokens: 7497318400 | elapsed time per iteration (s): 0.31 | learning rate: 6.662E-05 | global batch size: 256 | lm loss: 2.514705E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.946 | TFLOPs: 28.88 | +7: iteration 14310/ 21553 | consumed samples: 3663360 | consumed tokens: 7502561280 | elapsed time per iteration (s): 0.31 | learning rate: 6.651E-05 | global batch size: 256 | lm loss: 2.523824E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.219 | TFLOPs: 29.10 | +7: iteration 14320/ 21553 | consumed samples: 3665920 | consumed tokens: 7507804160 | elapsed time per iteration (s): 0.30 | learning rate: 6.639E-05 | global batch size: 256 | lm loss: 2.528566E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.523 | TFLOPs: 29.56 | +7: iteration 14330/ 21553 | consumed samples: 3668480 | consumed tokens: 7513047040 | elapsed time per iteration (s): 0.30 | learning rate: 6.627E-05 | global batch size: 256 | lm loss: 2.542132E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.132 | TFLOPs: 29.69 | +7: iteration 14340/ 21553 | consumed samples: 3671040 | consumed tokens: 7518289920 | elapsed time per iteration (s): 0.30 | learning rate: 6.616E-05 | global batch size: 256 | lm loss: 2.563743E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.568 | TFLOPs: 29.43 | +7: iteration 14350/ 21553 | consumed samples: 3673600 | consumed tokens: 7523532800 | elapsed time per iteration (s): 0.30 | learning rate: 6.604E-05 | global batch size: 256 | lm loss: 2.589575E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.301 | TFLOPs: 29.52 | +7: iteration 14360/ 21553 | consumed samples: 3676160 | consumed tokens: 7528775680 | elapsed time per iteration (s): 0.30 | learning rate: 6.593E-05 | global batch size: 256 | lm loss: 2.594070E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.791 | TFLOPs: 29.85 | +7: iteration 14370/ 21553 | consumed samples: 3678720 | consumed tokens: 7534018560 | elapsed time per iteration (s): 0.31 | learning rate: 6.581E-05 | global batch size: 256 | lm loss: 2.592555E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.485 | TFLOPs: 28.93 | +7: iteration 14380/ 21553 | consumed samples: 3681280 | consumed tokens: 7539261440 | elapsed time per iteration (s): 0.31 | learning rate: 6.570E-05 | global batch size: 256 | lm loss: 2.544846E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.207 | TFLOPs: 28.47 | +7: iteration 14390/ 21553 | consumed samples: 3683840 | consumed tokens: 7544504320 | elapsed time per iteration (s): 0.30 | learning rate: 6.558E-05 | global batch size: 256 | lm loss: 2.572547E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.681 | TFLOPs: 29.78 | +7: iteration 14400/ 21553 | consumed samples: 3686400 | consumed tokens: 7549747200 | elapsed time per iteration (s): 0.30 | learning rate: 6.547E-05 | global batch size: 256 | lm loss: 2.519839E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.315 | TFLOPs: 29.63 | +7: iteration 14410/ 21553 | consumed samples: 3688960 | consumed tokens: 7554990080 | elapsed time per iteration (s): 0.31 | learning rate: 6.535E-05 | global batch size: 256 | lm loss: 2.522890E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.602 | TFLOPs: 29.32 | +7: iteration 14420/ 21553 | consumed samples: 3691520 | consumed tokens: 7560232960 | elapsed time per iteration (s): 0.30 | learning rate: 6.524E-05 | global batch size: 256 | lm loss: 2.555654E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.553 | TFLOPs: 29.67 | +7: iteration 14430/ 21553 | consumed samples: 3694080 | consumed tokens: 7565475840 | elapsed time per iteration (s): 0.30 | learning rate: 6.512E-05 | global batch size: 256 | lm loss: 2.565345E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.943 | TFLOPs: 29.75 | +7: iteration 14440/ 21553 | consumed samples: 3696640 | consumed tokens: 7570718720 | elapsed time per iteration (s): 0.32 | learning rate: 6.501E-05 | global batch size: 256 | lm loss: 2.529091E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 802.943 | TFLOPs: 28.11 | +7: iteration 14450/ 21553 | consumed samples: 3699200 | consumed tokens: 7575961600 | elapsed time per iteration (s): 0.33 | learning rate: 6.489E-05 | global batch size: 256 | lm loss: 2.545242E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 786.567 | TFLOPs: 27.54 | +7: iteration 14460/ 21553 | consumed samples: 3701760 | consumed tokens: 7581204480 | elapsed time per iteration (s): 0.30 | learning rate: 6.478E-05 | global batch size: 256 | lm loss: 2.574610E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.037 | TFLOPs: 29.51 | +7: iteration 14470/ 21553 | consumed samples: 3704320 | consumed tokens: 7586447360 | elapsed time per iteration (s): 0.30 | learning rate: 6.466E-05 | global batch size: 256 | lm loss: 2.579346E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.720 | TFLOPs: 29.43 | +7: iteration 14480/ 21553 | consumed samples: 3706880 | consumed tokens: 7591690240 | elapsed time per iteration (s): 0.30 | learning rate: 6.455E-05 | global batch size: 256 | lm loss: 2.541850E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.282 | TFLOPs: 29.56 | +7: iteration 14490/ 21553 | consumed samples: 3709440 | consumed tokens: 7596933120 | elapsed time per iteration (s): 0.31 | learning rate: 6.443E-05 | global batch size: 256 | lm loss: 2.538917E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.211 | TFLOPs: 29.38 | +7: iteration 14500/ 21553 | consumed samples: 3712000 | consumed tokens: 7602176000 | elapsed time per iteration (s): 0.30 | learning rate: 6.432E-05 | global batch size: 256 | lm loss: 2.557298E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.646 | TFLOPs: 29.81 | +7: iteration 14510/ 21553 | consumed samples: 3714560 | consumed tokens: 7607418880 | elapsed time per iteration (s): 0.30 | learning rate: 6.421E-05 | global batch size: 256 | lm loss: 2.573004E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.975 | TFLOPs: 29.83 | +7: iteration 14520/ 21553 | consumed samples: 3717120 | consumed tokens: 7612661760 | elapsed time per iteration (s): 0.30 | learning rate: 6.409E-05 | global batch size: 256 | lm loss: 2.614277E+00 | grad norm: 0.265 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.470 | TFLOPs: 29.74 | +7: iteration 14530/ 21553 | consumed samples: 3719680 | consumed tokens: 7617904640 | elapsed time per iteration (s): 0.32 | learning rate: 6.398E-05 | global batch size: 256 | lm loss: 2.552359E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 812.286 | TFLOPs: 28.44 | +7: iteration 14540/ 21553 | consumed samples: 3722240 | consumed tokens: 7623147520 | elapsed time per iteration (s): 0.30 | learning rate: 6.386E-05 | global batch size: 256 | lm loss: 2.566722E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.508 | TFLOPs: 29.81 | +7: iteration 14550/ 21553 | consumed samples: 3724800 | consumed tokens: 7628390400 | elapsed time per iteration (s): 0.30 | learning rate: 6.375E-05 | global batch size: 256 | lm loss: 2.533042E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.854 | TFLOPs: 29.61 | +7: iteration 14560/ 21553 | consumed samples: 3727360 | consumed tokens: 7633633280 | elapsed time per iteration (s): 0.30 | learning rate: 6.364E-05 | global batch size: 256 | lm loss: 2.548905E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.120 | TFLOPs: 29.73 | +7: iteration 14570/ 21553 | consumed samples: 3729920 | consumed tokens: 7638876160 | elapsed time per iteration (s): 0.30 | learning rate: 6.352E-05 | global batch size: 256 | lm loss: 2.551721E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.279 | TFLOPs: 29.63 | +7: iteration 14580/ 21553 | consumed samples: 3732480 | consumed tokens: 7644119040 | elapsed time per iteration (s): 0.30 | learning rate: 6.341E-05 | global batch size: 256 | lm loss: 2.527903E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.285 | TFLOPs: 29.45 | +7: iteration 14590/ 21553 | consumed samples: 3735040 | consumed tokens: 7649361920 | elapsed time per iteration (s): 0.30 | learning rate: 6.330E-05 | global batch size: 256 | lm loss: 2.570539E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.645 | TFLOPs: 29.81 | +7: iteration 14600/ 21553 | consumed samples: 3737600 | consumed tokens: 7654604800 | elapsed time per iteration (s): 0.30 | learning rate: 6.318E-05 | global batch size: 256 | lm loss: 2.581050E+00 | grad norm: 0.307 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.116 | TFLOPs: 29.73 | +7: iteration 14610/ 21553 | consumed samples: 3740160 | consumed tokens: 7659847680 | elapsed time per iteration (s): 0.30 | learning rate: 6.307E-05 | global batch size: 256 | lm loss: 2.563060E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.590 | TFLOPs: 29.99 | +7: iteration 14620/ 21553 | consumed samples: 3742720 | consumed tokens: 7665090560 | elapsed time per iteration (s): 0.32 | learning rate: 6.296E-05 | global batch size: 256 | lm loss: 2.569780E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 811.795 | TFLOPs: 28.42 | +7: iteration 14630/ 21553 | consumed samples: 3745280 | consumed tokens: 7670333440 | elapsed time per iteration (s): 0.31 | learning rate: 6.284E-05 | global batch size: 256 | lm loss: 2.533321E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.388 | TFLOPs: 29.31 | +7: iteration 14640/ 21553 | consumed samples: 3747840 | consumed tokens: 7675576320 | elapsed time per iteration (s): 0.30 | learning rate: 6.273E-05 | global batch size: 256 | lm loss: 2.542956E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.066 | TFLOPs: 29.72 | +7: iteration 14650/ 21553 | consumed samples: 3750400 | consumed tokens: 7680819200 | elapsed time per iteration (s): 0.32 | learning rate: 6.262E-05 | global batch size: 256 | lm loss: 2.564781E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 808.134 | TFLOPs: 28.29 | +7: iteration 14660/ 21553 | consumed samples: 3752960 | consumed tokens: 7686062080 | elapsed time per iteration (s): 0.30 | learning rate: 6.251E-05 | global batch size: 256 | lm loss: 2.537989E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.480 | TFLOPs: 29.60 | +7: iteration 14670/ 21553 | consumed samples: 3755520 | consumed tokens: 7691304960 | elapsed time per iteration (s): 0.30 | learning rate: 6.239E-05 | global batch size: 256 | lm loss: 2.546319E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.899 | TFLOPs: 29.82 | +7: iteration 14680/ 21553 | consumed samples: 3758080 | consumed tokens: 7696547840 | elapsed time per iteration (s): 0.30 | learning rate: 6.228E-05 | global batch size: 256 | lm loss: 2.572897E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.568 | TFLOPs: 29.53 | +7: iteration 14690/ 21553 | consumed samples: 3760640 | consumed tokens: 7701790720 | elapsed time per iteration (s): 0.30 | learning rate: 6.217E-05 | global batch size: 256 | lm loss: 2.539486E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.306 | TFLOPs: 29.63 | +7: iteration 14700/ 21553 | consumed samples: 3763200 | consumed tokens: 7707033600 | elapsed time per iteration (s): 0.31 | learning rate: 6.206E-05 | global batch size: 256 | lm loss: 2.575358E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.967 | TFLOPs: 28.98 | +7: iteration 14710/ 21553 | consumed samples: 3765760 | consumed tokens: 7712276480 | elapsed time per iteration (s): 0.32 | learning rate: 6.194E-05 | global batch size: 256 | lm loss: 2.542106E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 792.639 | TFLOPs: 27.75 | +7: iteration 14720/ 21553 | consumed samples: 3768320 | consumed tokens: 7717519360 | elapsed time per iteration (s): 0.31 | learning rate: 6.183E-05 | global batch size: 256 | lm loss: 2.560979E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.841 | TFLOPs: 29.23 | +7: iteration 14730/ 21553 | consumed samples: 3770880 | consumed tokens: 7722762240 | elapsed time per iteration (s): 0.30 | learning rate: 6.172E-05 | global batch size: 256 | lm loss: 2.561361E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.991 | TFLOPs: 29.79 | +7: iteration 14740/ 21553 | consumed samples: 3773440 | consumed tokens: 7728005120 | elapsed time per iteration (s): 0.31 | learning rate: 6.161E-05 | global batch size: 256 | lm loss: 2.559471E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.383 | TFLOPs: 29.03 | +7: iteration 14750/ 21553 | consumed samples: 3776000 | consumed tokens: 7733248000 | elapsed time per iteration (s): 0.31 | learning rate: 6.150E-05 | global batch size: 256 | lm loss: 2.561457E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.449 | TFLOPs: 29.18 | +7: iteration 14760/ 21553 | consumed samples: 3778560 | consumed tokens: 7738490880 | elapsed time per iteration (s): 0.30 | learning rate: 6.139E-05 | global batch size: 256 | lm loss: 2.537438E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.544 | TFLOPs: 29.57 | +7: iteration 14770/ 21553 | consumed samples: 3781120 | consumed tokens: 7743733760 | elapsed time per iteration (s): 0.30 | learning rate: 6.127E-05 | global batch size: 256 | lm loss: 2.544954E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.512 | TFLOPs: 29.49 | +7: iteration 14780/ 21553 | consumed samples: 3783680 | consumed tokens: 7748976640 | elapsed time per iteration (s): 0.32 | learning rate: 6.116E-05 | global batch size: 256 | lm loss: 2.581574E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 801.650 | TFLOPs: 28.06 | +7: iteration 14790/ 21553 | consumed samples: 3786240 | consumed tokens: 7754219520 | elapsed time per iteration (s): 0.31 | learning rate: 6.105E-05 | global batch size: 256 | lm loss: 2.572382E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.443 | TFLOPs: 28.69 | +7: iteration 14800/ 21553 | consumed samples: 3788800 | consumed tokens: 7759462400 | elapsed time per iteration (s): 0.30 | learning rate: 6.094E-05 | global batch size: 256 | lm loss: 2.543392E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.855 | TFLOPs: 29.40 | +7: iteration 14810/ 21553 | consumed samples: 3791360 | consumed tokens: 7764705280 | elapsed time per iteration (s): 0.31 | learning rate: 6.083E-05 | global batch size: 256 | lm loss: 2.587545E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.333 | TFLOPs: 28.47 | +7: iteration 14820/ 21553 | consumed samples: 3793920 | consumed tokens: 7769948160 | elapsed time per iteration (s): 0.33 | learning rate: 6.072E-05 | global batch size: 256 | lm loss: 2.529985E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 787.086 | TFLOPs: 27.55 | +7: iteration 14830/ 21553 | consumed samples: 3796480 | consumed tokens: 7775191040 | elapsed time per iteration (s): 0.31 | learning rate: 6.061E-05 | global batch size: 256 | lm loss: 2.559328E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.246 | TFLOPs: 29.31 | +7: iteration 14840/ 21553 | consumed samples: 3799040 | consumed tokens: 7780433920 | elapsed time per iteration (s): 0.31 | learning rate: 6.050E-05 | global batch size: 256 | lm loss: 2.540440E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.640 | TFLOPs: 28.83 | +7: iteration 14850/ 21553 | consumed samples: 3801600 | consumed tokens: 7785676800 | elapsed time per iteration (s): 0.31 | learning rate: 6.039E-05 | global batch size: 256 | lm loss: 2.541762E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.421 | TFLOPs: 29.07 | +7: iteration 14860/ 21553 | consumed samples: 3804160 | consumed tokens: 7790919680 | elapsed time per iteration (s): 0.30 | learning rate: 6.028E-05 | global batch size: 256 | lm loss: 2.532761E+00 | grad norm: 0.267 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.343 | TFLOPs: 29.59 | +7: iteration 14870/ 21553 | consumed samples: 3806720 | consumed tokens: 7796162560 | elapsed time per iteration (s): 0.31 | learning rate: 6.016E-05 | global batch size: 256 | lm loss: 2.576769E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.900 | TFLOPs: 29.23 | +7: iteration 14880/ 21553 | consumed samples: 3809280 | consumed tokens: 7801405440 | elapsed time per iteration (s): 0.31 | learning rate: 6.005E-05 | global batch size: 256 | lm loss: 2.547014E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.009 | TFLOPs: 29.23 | +7: iteration 14890/ 21553 | consumed samples: 3811840 | consumed tokens: 7806648320 | elapsed time per iteration (s): 0.32 | learning rate: 5.994E-05 | global batch size: 256 | lm loss: 2.527672E+00 | grad norm: 0.314 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 796.033 | TFLOPs: 27.87 | +7: iteration 14900/ 21553 | consumed samples: 3814400 | consumed tokens: 7811891200 | elapsed time per iteration (s): 0.31 | learning rate: 5.983E-05 | global batch size: 256 | lm loss: 2.530779E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.063 | TFLOPs: 28.81 | +7: iteration 14910/ 21553 | consumed samples: 3816960 | consumed tokens: 7817134080 | elapsed time per iteration (s): 0.30 | learning rate: 5.972E-05 | global batch size: 256 | lm loss: 2.571278E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.363 | TFLOPs: 29.49 | +7: iteration 14920/ 21553 | consumed samples: 3819520 | consumed tokens: 7822376960 | elapsed time per iteration (s): 0.30 | learning rate: 5.961E-05 | global batch size: 256 | lm loss: 2.530407E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.142 | TFLOPs: 29.83 | +7: iteration 14930/ 21553 | consumed samples: 3822080 | consumed tokens: 7827619840 | elapsed time per iteration (s): 0.32 | learning rate: 5.950E-05 | global batch size: 256 | lm loss: 2.535612E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 796.771 | TFLOPs: 27.89 | +7: iteration 14940/ 21553 | consumed samples: 3824640 | consumed tokens: 7832862720 | elapsed time per iteration (s): 0.31 | learning rate: 5.940E-05 | global batch size: 256 | lm loss: 2.558547E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.552 | TFLOPs: 28.94 | +7: iteration 14950/ 21553 | consumed samples: 3827200 | consumed tokens: 7838105600 | elapsed time per iteration (s): 0.31 | learning rate: 5.929E-05 | global batch size: 256 | lm loss: 2.529549E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.138 | TFLOPs: 29.31 | +7: iteration 14960/ 21553 | consumed samples: 3829760 | consumed tokens: 7843348480 | elapsed time per iteration (s): 0.30 | learning rate: 5.918E-05 | global batch size: 256 | lm loss: 2.503419E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.112 | TFLOPs: 29.52 | +7: iteration 14970/ 21553 | consumed samples: 3832320 | consumed tokens: 7848591360 | elapsed time per iteration (s): 0.31 | learning rate: 5.907E-05 | global batch size: 256 | lm loss: 2.542283E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.298 | TFLOPs: 29.31 | +7: iteration 14980/ 21553 | consumed samples: 3834880 | consumed tokens: 7853834240 | elapsed time per iteration (s): 0.30 | learning rate: 5.896E-05 | global batch size: 256 | lm loss: 2.552258E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.056 | TFLOPs: 29.86 | +7: iteration 14990/ 21553 | consumed samples: 3837440 | consumed tokens: 7859077120 | elapsed time per iteration (s): 0.31 | learning rate: 5.885E-05 | global batch size: 256 | lm loss: 2.599643E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.084 | TFLOPs: 29.06 | +7: iteration 15000/ 21553 | consumed samples: 3840000 | consumed tokens: 7864320000 | elapsed time per iteration (s): 0.31 | learning rate: 5.874E-05 | global batch size: 256 | lm loss: 2.539708E+00 | grad norm: 0.330 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.262 | TFLOPs: 29.10 | +7: ------------------------------------------------------------------------------------------- +7: valid loss at iteration 15000 | lm loss value: 2.525887E+00 | lm loss PPL: 1.250198E+01 | +7: ------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 15000 to checkpoints_146m +0: [2022-11-25 18:49:26,330] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step15000 is begin to save! +0: [2022-11-25 18:49:26,334] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_01-model_00-model_states.pt... +0: [2022-11-25 18:49:26,418] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_01-model_00-model_states.pt. +0: [2022-11-25 18:49:26,418] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_03-model_00-model_states.pt... +0: [2022-11-25 18:49:26,433] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_03-model_00-model_states.pt. +0: [2022-11-25 18:49:26,434] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_04-model_00-model_states.pt... +0: [2022-11-25 18:49:26,448] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_04-model_00-model_states.pt. +0: [2022-11-25 18:49:26,449] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_05-model_00-model_states.pt... +0: [2022-11-25 18:49:26,463] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_05-model_00-model_states.pt. +0: [2022-11-25 18:49:26,463] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_06-model_00-model_states.pt... +0: [2022-11-25 18:49:26,478] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_06-model_00-model_states.pt. +0: [2022-11-25 18:49:26,478] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_07-model_00-model_states.pt... +0: [2022-11-25 18:49:26,492] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_07-model_00-model_states.pt. +0: [2022-11-25 18:49:26,492] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_08-model_00-model_states.pt... +0: [2022-11-25 18:49:26,507] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_08-model_00-model_states.pt. +0: [2022-11-25 18:49:26,507] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_09-model_00-model_states.pt... +0: [2022-11-25 18:49:26,521] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_09-model_00-model_states.pt. +0: [2022-11-25 18:49:26,522] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_10-model_00-model_states.pt... +0: [2022-11-25 18:49:26,536] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_10-model_00-model_states.pt. +0: [2022-11-25 18:49:26,536] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_11-model_00-model_states.pt... +0: [2022-11-25 18:49:26,551] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_11-model_00-model_states.pt. +0: [2022-11-25 18:49:26,551] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_12-model_00-model_states.pt... +0: [2022-11-25 18:49:26,566] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_12-model_00-model_states.pt. +0: [2022-11-25 18:49:26,566] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_13-model_00-model_states.pt... +0: [2022-11-25 18:49:26,581] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_13-model_00-model_states.pt. +0: [2022-11-25 18:49:26,581] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_14-model_00-model_states.pt... +0: [2022-11-25 18:49:26,595] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_14-model_00-model_states.pt. +0: [2022-11-25 18:49:26,596] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_15-model_00-model_states.pt... +0: [2022-11-25 18:49:26,610] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_15-model_00-model_states.pt. +0: [2022-11-25 18:49:26,610] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_16-model_00-model_states.pt... +0: [2022-11-25 18:49:26,625] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_16-model_00-model_states.pt. +0: [2022-11-25 18:49:26,625] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_17-model_00-model_states.pt... +0: [2022-11-25 18:49:26,639] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_17-model_00-model_states.pt. +0: [2022-11-25 18:49:26,639] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/layer_19-model_00-model_states.pt... +0: [2022-11-25 18:49:26,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/layer_19-model_00-model_states.pt. +0: [2022-11-25 18:49:26,641] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step15000/mp_rank_00_model_states.pt +0: [2022-11-25 18:49:26,641] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/mp_rank_00_model_states.pt... +0: [2022-11-25 18:49:26,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/mp_rank_00_model_states.pt. +0: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:49:26,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step15000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:49:26,695] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:49:26,695] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 18:49:26,695] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2022-11-25 18:49:26,695] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:49:26,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 18:49:26,695] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:49:26,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2022-11-25 18:49:26,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 18:49:26,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2022-11-25 18:49:26,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:49:26,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 18:49:26,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2022-11-25 18:49:26,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:49:26,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +1: [2022-11-25 18:49:26,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:49:26,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:49:26,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2022-11-25 18:49:26,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:49:26,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:49:26,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +1: [2022-11-25 18:49:26,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 18:49:26,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 18:49:26,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2022-11-25 18:49:26,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:49:26,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2022-11-25 18:49:26,699] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:49:26,699] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 18:49:26,699] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2022-11-25 18:49:26,699] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:49:26,699] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 18:49:26,699] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2022-11-25 18:49:26,699] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:49:26,699] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 18:49:26,699] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2022-11-25 18:49:26,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:49:26,701] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 18:49:26,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2022-11-25 18:49:26,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:49:26,701] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 18:49:26,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2022-11-25 18:49:26,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:49:26,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:49:26,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +7: [2022-11-25 18:49:26,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +5: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:49:26,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:49:26,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2022-11-25 18:49:26,703] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:49:26,703] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 18:49:26,703] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2022-11-25 18:49:26,703] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:49:26,703] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 18:49:26,703] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2022-11-25 18:49:26,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:49:26,704] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 18:49:26,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2022-11-25 18:49:26,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:49:26,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 18:49:26,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2022-11-25 18:49:26,706] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:49:26,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 18:49:26,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2022-11-25 18:49:26,706] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:49:26,706] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:49:26,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 18:49:26,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 18:49:26,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2022-11-25 18:49:26,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2022-11-25 18:49:26,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:49:26,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:49:26,708] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 18:49:26,708] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 18:49:26,708] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2022-11-25 18:49:26,708] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2022-11-25 18:49:26,699] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:49:26,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:49:26,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:49:26,699] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +2: [2022-11-25 18:49:26,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +6: [2022-11-25 18:49:26,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +4: [2022-11-25 18:49:26,699] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2022-11-25 18:49:26,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2022-11-25 18:49:26,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2022-11-25 18:49:26,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:49:26,708] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:49:26,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:49:26,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:49:26,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:49:26,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +2: [2022-11-25 18:49:26,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 18:49:26,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +6: [2022-11-25 18:49:26,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +4: [2022-11-25 18:49:26,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2022-11-25 18:49:26,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2022-11-25 18:49:26,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2022-11-25 18:49:26,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2022-11-25 18:49:26,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:49:26,708] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +2: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:49:26,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:49:26,701] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +2: [2022-11-25 18:49:26,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +6: [2022-11-25 18:49:26,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +4: [2022-11-25 18:49:26,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2022-11-25 18:49:26,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:49:26,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +7: [2022-11-25 18:49:26,708] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2022-11-25 18:49:26,699] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +6: [2022-11-25 18:49:26,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +0: [2022-11-25 18:49:26,708] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:49:26,708] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2022-11-25 18:49:26,699] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2022-11-25 18:49:26,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2022-11-25 18:49:26,708] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:49:26,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:49:26,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:49:26,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:49:26,708] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:49:26,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:49:26,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +6: [2022-11-25 18:49:26,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +0: [2022-11-25 18:49:26,708] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +4: [2022-11-25 18:49:26,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +2: [2022-11-25 18:49:26,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2022-11-25 18:49:26,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2022-11-25 18:49:26,708] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +4: [2022-11-25 18:49:26,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2022-11-25 18:49:26,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2022-11-25 18:49:26,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:49:26,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:49:26,708] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +4: [2022-11-25 18:49:26,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2022-11-25 18:49:26,707] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2022-11-25 18:49:26,707] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +0: [2022-11-25 18:49:26,709] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2022-11-25 18:49:26,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:49:26,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2022-11-25 18:49:26,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2022-11-25 18:49:26,709] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2022-11-25 18:49:26,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +0: [2022-11-25 18:49:26,709] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2022-11-25 18:49:26,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2022-11-25 18:49:26,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:49:26,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 18:49:26,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2022-11-25 18:49:26,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:49:26,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 18:49:26,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:49:26,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 18:49:26,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2022-11-25 18:49:26,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:49:26,704] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 18:49:26,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2022-11-25 18:49:26,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:49:26,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 18:49:26,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2022-11-25 18:49:26,706] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:49:26,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 18:49:26,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2022-11-25 18:49:26,716] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:49:26,716] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:49:26,716] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:49:26,716] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 18:49:26,716] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 18:49:26,716] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 18:49:26,716] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2022-11-25 18:49:26,716] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2022-11-25 18:49:26,716] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2022-11-25 18:49:26,724] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step15000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 18:49:26,724] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: successfully saved checkpoint at iteration 15000 to checkpoints_146m +7: time (ms) | save-checkpoint: 399.83 +7: iteration 15010/ 21553 | consumed samples: 3842560 | consumed tokens: 7869562880 | elapsed time per iteration (s): 0.36 | learning rate: 5.863E-05 | global batch size: 256 | lm loss: 2.556521E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 718.102 | TFLOPs: 25.14 | +7: iteration 15020/ 21553 | consumed samples: 3845120 | consumed tokens: 7874805760 | elapsed time per iteration (s): 0.31 | learning rate: 5.852E-05 | global batch size: 256 | lm loss: 2.537073E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.246 | TFLOPs: 29.06 | +7: iteration 15030/ 21553 | consumed samples: 3847680 | consumed tokens: 7880048640 | elapsed time per iteration (s): 0.30 | learning rate: 5.841E-05 | global batch size: 256 | lm loss: 2.567724E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.194 | TFLOPs: 29.52 | +7: iteration 15040/ 21553 | consumed samples: 3850240 | consumed tokens: 7885291520 | elapsed time per iteration (s): 0.31 | learning rate: 5.830E-05 | global batch size: 256 | lm loss: 2.594714E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.906 | TFLOPs: 29.37 | +7: iteration 15050/ 21553 | consumed samples: 3852800 | consumed tokens: 7890534400 | elapsed time per iteration (s): 0.31 | learning rate: 5.820E-05 | global batch size: 256 | lm loss: 2.532467E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.051 | TFLOPs: 29.37 | +7: iteration 15060/ 21553 | consumed samples: 3855360 | consumed tokens: 7895777280 | elapsed time per iteration (s): 0.30 | learning rate: 5.809E-05 | global batch size: 256 | lm loss: 2.531818E+00 | grad norm: 0.264 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.579 | TFLOPs: 29.85 | +7: iteration 15070/ 21553 | consumed samples: 3857920 | consumed tokens: 7901020160 | elapsed time per iteration (s): 0.30 | learning rate: 5.798E-05 | global batch size: 256 | lm loss: 2.547008E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.518 | TFLOPs: 29.53 | +7: iteration 15080/ 21553 | consumed samples: 3860480 | consumed tokens: 7906263040 | elapsed time per iteration (s): 0.31 | learning rate: 5.787E-05 | global batch size: 256 | lm loss: 2.534298E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.300 | TFLOPs: 28.93 | +7: iteration 15090/ 21553 | consumed samples: 3863040 | consumed tokens: 7911505920 | elapsed time per iteration (s): 0.30 | learning rate: 5.776E-05 | global batch size: 256 | lm loss: 2.539417E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.565 | TFLOPs: 29.50 | +7: iteration 15100/ 21553 | consumed samples: 3865600 | consumed tokens: 7916748800 | elapsed time per iteration (s): 0.31 | learning rate: 5.766E-05 | global batch size: 256 | lm loss: 2.532890E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.846 | TFLOPs: 29.05 | +7: iteration 15110/ 21553 | consumed samples: 3868160 | consumed tokens: 7921991680 | elapsed time per iteration (s): 0.30 | learning rate: 5.755E-05 | global batch size: 256 | lm loss: 2.560997E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.536 | TFLOPs: 29.84 | +7: iteration 15120/ 21553 | consumed samples: 3870720 | consumed tokens: 7927234560 | elapsed time per iteration (s): 0.30 | learning rate: 5.744E-05 | global batch size: 256 | lm loss: 2.546210E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.846 | TFLOPs: 29.72 | +7: iteration 15130/ 21553 | consumed samples: 3873280 | consumed tokens: 7932477440 | elapsed time per iteration (s): 0.31 | learning rate: 5.733E-05 | global batch size: 256 | lm loss: 2.528904E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.339 | TFLOPs: 29.31 | +7: iteration 15140/ 21553 | consumed samples: 3875840 | consumed tokens: 7937720320 | elapsed time per iteration (s): 0.31 | learning rate: 5.723E-05 | global batch size: 256 | lm loss: 2.574560E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.487 | TFLOPs: 28.86 | +7: iteration 15150/ 21553 | consumed samples: 3878400 | consumed tokens: 7942963200 | elapsed time per iteration (s): 0.30 | learning rate: 5.712E-05 | global batch size: 256 | lm loss: 2.527702E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.451 | TFLOPs: 29.53 | +7: iteration 15160/ 21553 | consumed samples: 3880960 | consumed tokens: 7948206080 | elapsed time per iteration (s): 0.31 | learning rate: 5.701E-05 | global batch size: 256 | lm loss: 2.543871E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.234 | TFLOPs: 29.27 | +7: iteration 15170/ 21553 | consumed samples: 3883520 | consumed tokens: 7953448960 | elapsed time per iteration (s): 0.30 | learning rate: 5.690E-05 | global batch size: 256 | lm loss: 2.565718E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.685 | TFLOPs: 29.96 | +7: iteration 15180/ 21553 | consumed samples: 3886080 | consumed tokens: 7958691840 | elapsed time per iteration (s): 0.31 | learning rate: 5.680E-05 | global batch size: 256 | lm loss: 2.537295E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.238 | TFLOPs: 29.17 | +7: iteration 15190/ 21553 | consumed samples: 3888640 | consumed tokens: 7963934720 | elapsed time per iteration (s): 0.31 | learning rate: 5.669E-05 | global batch size: 256 | lm loss: 2.542570E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.673 | TFLOPs: 29.01 | +7: iteration 15200/ 21553 | consumed samples: 3891200 | consumed tokens: 7969177600 | elapsed time per iteration (s): 0.32 | learning rate: 5.658E-05 | global batch size: 256 | lm loss: 2.566919E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 804.811 | TFLOPs: 28.17 | +7: iteration 15210/ 21553 | consumed samples: 3893760 | consumed tokens: 7974420480 | elapsed time per iteration (s): 0.30 | learning rate: 5.648E-05 | global batch size: 256 | lm loss: 2.542007E+00 | grad norm: 0.323 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.439 | TFLOPs: 29.42 | +7: iteration 15220/ 21553 | consumed samples: 3896320 | consumed tokens: 7979663360 | elapsed time per iteration (s): 0.31 | learning rate: 5.637E-05 | global batch size: 256 | lm loss: 2.569505E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.271 | TFLOPs: 28.96 | +7: iteration 15230/ 21553 | consumed samples: 3898880 | consumed tokens: 7984906240 | elapsed time per iteration (s): 0.30 | learning rate: 5.626E-05 | global batch size: 256 | lm loss: 2.549027E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.263 | TFLOPs: 29.52 | +7: iteration 15240/ 21553 | consumed samples: 3901440 | consumed tokens: 7990149120 | elapsed time per iteration (s): 0.31 | learning rate: 5.616E-05 | global batch size: 256 | lm loss: 2.556066E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.466 | TFLOPs: 29.04 | +7: iteration 15250/ 21553 | consumed samples: 3904000 | consumed tokens: 7995392000 | elapsed time per iteration (s): 0.30 | learning rate: 5.605E-05 | global batch size: 256 | lm loss: 2.539439E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.412 | TFLOPs: 29.39 | +7: iteration 15260/ 21553 | consumed samples: 3906560 | consumed tokens: 8000634880 | elapsed time per iteration (s): 0.31 | learning rate: 5.595E-05 | global batch size: 256 | lm loss: 2.567829E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.155 | TFLOPs: 29.27 | +7: iteration 15270/ 21553 | consumed samples: 3909120 | consumed tokens: 8005877760 | elapsed time per iteration (s): 0.30 | learning rate: 5.584E-05 | global batch size: 256 | lm loss: 2.540852E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.380 | TFLOPs: 29.42 | +7: iteration 15280/ 21553 | consumed samples: 3911680 | consumed tokens: 8011120640 | elapsed time per iteration (s): 0.30 | learning rate: 5.573E-05 | global batch size: 256 | lm loss: 2.559118E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.115 | TFLOPs: 29.94 | +7: iteration 15290/ 21553 | consumed samples: 3914240 | consumed tokens: 8016363520 | elapsed time per iteration (s): 0.31 | learning rate: 5.563E-05 | global batch size: 256 | lm loss: 2.534603E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.340 | TFLOPs: 29.14 | +7: iteration 15300/ 21553 | consumed samples: 3916800 | consumed tokens: 8021606400 | elapsed time per iteration (s): 0.31 | learning rate: 5.552E-05 | global batch size: 256 | lm loss: 2.535968E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.113 | TFLOPs: 28.74 | +7: iteration 15310/ 21553 | consumed samples: 3919360 | consumed tokens: 8026849280 | elapsed time per iteration (s): 0.30 | learning rate: 5.542E-05 | global batch size: 256 | lm loss: 2.557943E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.610 | TFLOPs: 29.39 | +7: iteration 15320/ 21553 | consumed samples: 3921920 | consumed tokens: 8032092160 | elapsed time per iteration (s): 0.30 | learning rate: 5.531E-05 | global batch size: 256 | lm loss: 2.546138E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.781 | TFLOPs: 29.54 | +7: iteration 15330/ 21553 | consumed samples: 3924480 | consumed tokens: 8037335040 | elapsed time per iteration (s): 0.30 | learning rate: 5.521E-05 | global batch size: 256 | lm loss: 2.530452E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.431 | TFLOPs: 29.70 | +7: iteration 15340/ 21553 | consumed samples: 3927040 | consumed tokens: 8042577920 | elapsed time per iteration (s): 0.30 | learning rate: 5.510E-05 | global batch size: 256 | lm loss: 2.561822E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.024 | TFLOPs: 29.83 | +7: iteration 15350/ 21553 | consumed samples: 3929600 | consumed tokens: 8047820800 | elapsed time per iteration (s): 0.30 | learning rate: 5.500E-05 | global batch size: 256 | lm loss: 2.546967E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.383 | TFLOPs: 29.73 | +7: iteration 15360/ 21553 | consumed samples: 3932160 | consumed tokens: 8053063680 | elapsed time per iteration (s): 0.32 | learning rate: 5.489E-05 | global batch size: 256 | lm loss: 2.550496E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.857 | TFLOPs: 28.28 | +7: iteration 15370/ 21553 | consumed samples: 3934720 | consumed tokens: 8058306560 | elapsed time per iteration (s): 0.31 | learning rate: 5.479E-05 | global batch size: 256 | lm loss: 2.559224E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.132 | TFLOPs: 29.31 | +7: iteration 15380/ 21553 | consumed samples: 3937280 | consumed tokens: 8063549440 | elapsed time per iteration (s): 0.32 | learning rate: 5.468E-05 | global batch size: 256 | lm loss: 2.522147E+00 | grad norm: 0.268 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.338 | TFLOPs: 28.26 | +7: iteration 15390/ 21553 | consumed samples: 3939840 | consumed tokens: 8068792320 | elapsed time per iteration (s): 0.30 | learning rate: 5.458E-05 | global batch size: 256 | lm loss: 2.563968E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.154 | TFLOPs: 29.76 | +7: iteration 15400/ 21553 | consumed samples: 3942400 | consumed tokens: 8074035200 | elapsed time per iteration (s): 0.30 | learning rate: 5.447E-05 | global batch size: 256 | lm loss: 2.533403E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.969 | TFLOPs: 29.76 | +7: iteration 15410/ 21553 | consumed samples: 3944960 | consumed tokens: 8079278080 | elapsed time per iteration (s): 0.31 | learning rate: 5.437E-05 | global batch size: 256 | lm loss: 2.579105E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.831 | TFLOPs: 29.33 | +7: iteration 15420/ 21553 | consumed samples: 3947520 | consumed tokens: 8084520960 | elapsed time per iteration (s): 0.30 | learning rate: 5.427E-05 | global batch size: 256 | lm loss: 2.567967E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.507 | TFLOPs: 29.60 | +7: iteration 15430/ 21553 | consumed samples: 3950080 | consumed tokens: 8089763840 | elapsed time per iteration (s): 0.30 | learning rate: 5.416E-05 | global batch size: 256 | lm loss: 2.543121E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.674 | TFLOPs: 29.39 | +7: iteration 15440/ 21553 | consumed samples: 3952640 | consumed tokens: 8095006720 | elapsed time per iteration (s): 0.30 | learning rate: 5.406E-05 | global batch size: 256 | lm loss: 2.541130E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.946 | TFLOPs: 29.54 | +7: iteration 15450/ 21553 | consumed samples: 3955200 | consumed tokens: 8100249600 | elapsed time per iteration (s): 0.30 | learning rate: 5.395E-05 | global batch size: 256 | lm loss: 2.555148E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.735 | TFLOPs: 29.92 | +7: iteration 15460/ 21553 | consumed samples: 3957760 | consumed tokens: 8105492480 | elapsed time per iteration (s): 0.30 | learning rate: 5.385E-05 | global batch size: 256 | lm loss: 2.554650E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.370 | TFLOPs: 29.59 | +7: iteration 15470/ 21553 | consumed samples: 3960320 | consumed tokens: 8110735360 | elapsed time per iteration (s): 0.31 | learning rate: 5.375E-05 | global batch size: 256 | lm loss: 2.552813E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.672 | TFLOPs: 29.25 | +7: iteration 15480/ 21553 | consumed samples: 3962880 | consumed tokens: 8115978240 | elapsed time per iteration (s): 0.30 | learning rate: 5.364E-05 | global batch size: 256 | lm loss: 2.556519E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.709 | TFLOPs: 29.50 | +7: iteration 15490/ 21553 | consumed samples: 3965440 | consumed tokens: 8121221120 | elapsed time per iteration (s): 0.31 | learning rate: 5.354E-05 | global batch size: 256 | lm loss: 2.546391E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.707 | TFLOPs: 29.22 | +7: iteration 15500/ 21553 | consumed samples: 3968000 | consumed tokens: 8126464000 | elapsed time per iteration (s): 0.30 | learning rate: 5.344E-05 | global batch size: 256 | lm loss: 2.550803E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.129 | TFLOPs: 29.66 | +7: iteration 15510/ 21553 | consumed samples: 3970560 | consumed tokens: 8131706880 | elapsed time per iteration (s): 0.31 | learning rate: 5.333E-05 | global batch size: 256 | lm loss: 2.559855E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.179 | TFLOPs: 29.24 | +7: iteration 15520/ 21553 | consumed samples: 3973120 | consumed tokens: 8136949760 | elapsed time per iteration (s): 0.30 | learning rate: 5.323E-05 | global batch size: 256 | lm loss: 2.550403E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.523 | TFLOPs: 29.42 | +7: iteration 15530/ 21553 | consumed samples: 3975680 | consumed tokens: 8142192640 | elapsed time per iteration (s): 0.30 | learning rate: 5.313E-05 | global batch size: 256 | lm loss: 2.572190E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.264 | TFLOPs: 29.73 | +7: iteration 15540/ 21553 | consumed samples: 3978240 | consumed tokens: 8147435520 | elapsed time per iteration (s): 0.31 | learning rate: 5.303E-05 | global batch size: 256 | lm loss: 2.564397E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.032 | TFLOPs: 29.30 | +7: iteration 15550/ 21553 | consumed samples: 3980800 | consumed tokens: 8152678400 | elapsed time per iteration (s): 0.30 | learning rate: 5.292E-05 | global batch size: 256 | lm loss: 2.521836E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.708 | TFLOPs: 29.50 | +7: iteration 15560/ 21553 | consumed samples: 3983360 | consumed tokens: 8157921280 | elapsed time per iteration (s): 0.31 | learning rate: 5.282E-05 | global batch size: 256 | lm loss: 2.549417E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.597 | TFLOPs: 29.18 | +7: iteration 15570/ 21553 | consumed samples: 3985920 | consumed tokens: 8163164160 | elapsed time per iteration (s): 0.30 | learning rate: 5.272E-05 | global batch size: 256 | lm loss: 2.536831E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.996 | TFLOPs: 29.86 | +7: iteration 15580/ 21553 | consumed samples: 3988480 | consumed tokens: 8168407040 | elapsed time per iteration (s): 0.31 | learning rate: 5.262E-05 | global batch size: 256 | lm loss: 2.542167E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.447 | TFLOPs: 29.18 | +7: iteration 15590/ 21553 | consumed samples: 3991040 | consumed tokens: 8173649920 | elapsed time per iteration (s): 0.30 | learning rate: 5.251E-05 | global batch size: 256 | lm loss: 2.569860E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.830 | TFLOPs: 29.82 | +7: iteration 15600/ 21553 | consumed samples: 3993600 | consumed tokens: 8178892800 | elapsed time per iteration (s): 0.30 | learning rate: 5.241E-05 | global batch size: 256 | lm loss: 2.549220E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.717 | TFLOPs: 29.57 | +7: iteration 15610/ 21553 | consumed samples: 3996160 | consumed tokens: 8184135680 | elapsed time per iteration (s): 0.30 | learning rate: 5.231E-05 | global batch size: 256 | lm loss: 2.565299E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.640 | TFLOPs: 29.67 | +7: iteration 15620/ 21553 | consumed samples: 3998720 | consumed tokens: 8189378560 | elapsed time per iteration (s): 0.31 | learning rate: 5.221E-05 | global batch size: 256 | lm loss: 2.526848E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.727 | TFLOPs: 28.49 | +7: iteration 15630/ 21553 | consumed samples: 4001280 | consumed tokens: 8194621440 | elapsed time per iteration (s): 0.31 | learning rate: 5.211E-05 | global batch size: 256 | lm loss: 2.562572E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.885 | TFLOPs: 28.84 | +7: iteration 15640/ 21553 | consumed samples: 4003840 | consumed tokens: 8199864320 | elapsed time per iteration (s): 0.31 | learning rate: 5.201E-05 | global batch size: 256 | lm loss: 2.552765E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.500 | TFLOPs: 29.35 | +7: iteration 15650/ 21553 | consumed samples: 4006400 | consumed tokens: 8205107200 | elapsed time per iteration (s): 0.30 | learning rate: 5.191E-05 | global batch size: 256 | lm loss: 2.586148E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.163 | TFLOPs: 29.45 | +7: iteration 15660/ 21553 | consumed samples: 4008960 | consumed tokens: 8210350080 | elapsed time per iteration (s): 0.30 | learning rate: 5.180E-05 | global batch size: 256 | lm loss: 2.517662E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.739 | TFLOPs: 29.50 | +7: iteration 15670/ 21553 | consumed samples: 4011520 | consumed tokens: 8215592960 | elapsed time per iteration (s): 0.30 | learning rate: 5.170E-05 | global batch size: 256 | lm loss: 2.546167E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.264 | TFLOPs: 29.98 | +7: iteration 15680/ 21553 | consumed samples: 4014080 | consumed tokens: 8220835840 | elapsed time per iteration (s): 0.30 | learning rate: 5.160E-05 | global batch size: 256 | lm loss: 2.533387E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.040 | TFLOPs: 29.72 | +7: iteration 15690/ 21553 | consumed samples: 4016640 | consumed tokens: 8226078720 | elapsed time per iteration (s): 0.30 | learning rate: 5.150E-05 | global batch size: 256 | lm loss: 2.574428E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.376 | TFLOPs: 29.38 | +7: iteration 15700/ 21553 | consumed samples: 4019200 | consumed tokens: 8231321600 | elapsed time per iteration (s): 0.30 | learning rate: 5.140E-05 | global batch size: 256 | lm loss: 2.552794E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.354 | TFLOPs: 29.70 | +7: iteration 15710/ 21553 | consumed samples: 4021760 | consumed tokens: 8236564480 | elapsed time per iteration (s): 0.30 | learning rate: 5.130E-05 | global batch size: 256 | lm loss: 2.567128E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.672 | TFLOPs: 29.39 | +7: iteration 15720/ 21553 | consumed samples: 4024320 | consumed tokens: 8241807360 | elapsed time per iteration (s): 0.31 | learning rate: 5.120E-05 | global batch size: 256 | lm loss: 2.573065E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.208 | TFLOPs: 29.34 | +7: iteration 15730/ 21553 | consumed samples: 4026880 | consumed tokens: 8247050240 | elapsed time per iteration (s): 0.31 | learning rate: 5.110E-05 | global batch size: 256 | lm loss: 2.558873E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.428 | TFLOPs: 29.21 | +7: iteration 15740/ 21553 | consumed samples: 4029440 | consumed tokens: 8252293120 | elapsed time per iteration (s): 0.31 | learning rate: 5.100E-05 | global batch size: 256 | lm loss: 2.553873E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.641 | TFLOPs: 29.04 | +7: iteration 15750/ 21553 | consumed samples: 4032000 | consumed tokens: 8257536000 | elapsed time per iteration (s): 0.31 | learning rate: 5.090E-05 | global batch size: 256 | lm loss: 2.544818E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 814.816 | TFLOPs: 28.52 | +7: iteration 15760/ 21553 | consumed samples: 4034560 | consumed tokens: 8262778880 | elapsed time per iteration (s): 0.31 | learning rate: 5.080E-05 | global batch size: 256 | lm loss: 2.556535E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.094 | TFLOPs: 29.20 | +7: iteration 15770/ 21553 | consumed samples: 4037120 | consumed tokens: 8268021760 | elapsed time per iteration (s): 0.31 | learning rate: 5.070E-05 | global batch size: 256 | lm loss: 2.556035E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.502 | TFLOPs: 29.25 | +7: iteration 15780/ 21553 | consumed samples: 4039680 | consumed tokens: 8273264640 | elapsed time per iteration (s): 0.30 | learning rate: 5.060E-05 | global batch size: 256 | lm loss: 2.533722E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.575 | TFLOPs: 29.67 | +7: iteration 15790/ 21553 | consumed samples: 4042240 | consumed tokens: 8278507520 | elapsed time per iteration (s): 0.30 | learning rate: 5.050E-05 | global batch size: 256 | lm loss: 2.529167E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.825 | TFLOPs: 29.54 | +7: iteration 15800/ 21553 | consumed samples: 4044800 | consumed tokens: 8283750400 | elapsed time per iteration (s): 0.31 | learning rate: 5.040E-05 | global batch size: 256 | lm loss: 2.526031E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.781 | TFLOPs: 28.87 | +7: iteration 15810/ 21553 | consumed samples: 4047360 | consumed tokens: 8288993280 | elapsed time per iteration (s): 0.31 | learning rate: 5.030E-05 | global batch size: 256 | lm loss: 2.509716E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.998 | TFLOPs: 28.85 | +7: iteration 15820/ 21553 | consumed samples: 4049920 | consumed tokens: 8294236160 | elapsed time per iteration (s): 0.31 | learning rate: 5.020E-05 | global batch size: 256 | lm loss: 2.573064E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.292 | TFLOPs: 29.21 | +7: iteration 15830/ 21553 | consumed samples: 4052480 | consumed tokens: 8299479040 | elapsed time per iteration (s): 0.30 | learning rate: 5.010E-05 | global batch size: 256 | lm loss: 2.551065E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.596 | TFLOPs: 29.39 | +7: iteration 15840/ 21553 | consumed samples: 4055040 | consumed tokens: 8304721920 | elapsed time per iteration (s): 0.30 | learning rate: 5.001E-05 | global batch size: 256 | lm loss: 2.562696E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.736 | TFLOPs: 29.71 | +7: iteration 15850/ 21553 | consumed samples: 4057600 | consumed tokens: 8309964800 | elapsed time per iteration (s): 0.31 | learning rate: 4.991E-05 | global batch size: 256 | lm loss: 2.559734E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.745 | TFLOPs: 29.33 | +7: iteration 15860/ 21553 | consumed samples: 4060160 | consumed tokens: 8315207680 | elapsed time per iteration (s): 0.30 | learning rate: 4.981E-05 | global batch size: 256 | lm loss: 2.542688E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.580 | TFLOPs: 29.67 | +7: iteration 15870/ 21553 | consumed samples: 4062720 | consumed tokens: 8320450560 | elapsed time per iteration (s): 0.31 | learning rate: 4.971E-05 | global batch size: 256 | lm loss: 2.529183E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.717 | TFLOPs: 29.12 | +7: iteration 15880/ 21553 | consumed samples: 4065280 | consumed tokens: 8325693440 | elapsed time per iteration (s): 0.30 | learning rate: 4.961E-05 | global batch size: 256 | lm loss: 2.536445E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.323 | TFLOPs: 29.66 | +7: iteration 15890/ 21553 | consumed samples: 4067840 | consumed tokens: 8330936320 | elapsed time per iteration (s): 0.30 | learning rate: 4.951E-05 | global batch size: 256 | lm loss: 2.548959E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.567 | TFLOPs: 29.50 | +7: iteration 15900/ 21553 | consumed samples: 4070400 | consumed tokens: 8336179200 | elapsed time per iteration (s): 0.30 | learning rate: 4.942E-05 | global batch size: 256 | lm loss: 2.556865E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.701 | TFLOPs: 29.57 | +7: iteration 15910/ 21553 | consumed samples: 4072960 | consumed tokens: 8341422080 | elapsed time per iteration (s): 0.30 | learning rate: 4.932E-05 | global batch size: 256 | lm loss: 2.557385E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.335 | TFLOPs: 29.91 | +7: iteration 15920/ 21553 | consumed samples: 4075520 | consumed tokens: 8346664960 | elapsed time per iteration (s): 0.30 | learning rate: 4.922E-05 | global batch size: 256 | lm loss: 2.508792E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.674 | TFLOPs: 29.64 | +7: iteration 15930/ 21553 | consumed samples: 4078080 | consumed tokens: 8351907840 | elapsed time per iteration (s): 0.33 | learning rate: 4.912E-05 | global batch size: 256 | lm loss: 2.536042E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 771.558 | TFLOPs: 27.01 | +7: iteration 15940/ 21553 | consumed samples: 4080640 | consumed tokens: 8357150720 | elapsed time per iteration (s): 0.30 | learning rate: 4.902E-05 | global batch size: 256 | lm loss: 2.543424E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.524 | TFLOPs: 29.84 | +7: iteration 15950/ 21553 | consumed samples: 4083200 | consumed tokens: 8362393600 | elapsed time per iteration (s): 0.30 | learning rate: 4.893E-05 | global batch size: 256 | lm loss: 2.557369E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.024 | TFLOPs: 29.41 | +7: iteration 15960/ 21553 | consumed samples: 4085760 | consumed tokens: 8367636480 | elapsed time per iteration (s): 0.31 | learning rate: 4.883E-05 | global batch size: 256 | lm loss: 2.530827E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.631 | TFLOPs: 29.32 | +7: iteration 15970/ 21553 | consumed samples: 4088320 | consumed tokens: 8372879360 | elapsed time per iteration (s): 0.30 | learning rate: 4.873E-05 | global batch size: 256 | lm loss: 2.551175E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.316 | TFLOPs: 29.87 | +7: iteration 15980/ 21553 | consumed samples: 4090880 | consumed tokens: 8378122240 | elapsed time per iteration (s): 0.30 | learning rate: 4.864E-05 | global batch size: 256 | lm loss: 2.572000E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.437 | TFLOPs: 29.70 | +7: iteration 15990/ 21553 | consumed samples: 4093440 | consumed tokens: 8383365120 | elapsed time per iteration (s): 0.31 | learning rate: 4.854E-05 | global batch size: 256 | lm loss: 2.560444E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.085 | TFLOPs: 29.20 | +0: [2022-11-25 18:54:32,129] [INFO] [logging.py:68:log_dist] [Rank 0] step=16000, skipped=0, lr=[4.8441849544340955e-05, 4.8441849544340955e-05, 4.8441849544340955e-05], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 16000/ 21553 | consumed samples: 4096000 | consumed tokens: 8388608000 | elapsed time per iteration (s): 0.32 | learning rate: 4.844E-05 | global batch size: 256 | lm loss: 2.548167E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 811.070 | TFLOPs: 28.39 | +0: steps: 16000 loss: 2.6565 iter time (s): 0.304 samples/sec: 841.333 +7: ------------------------------------------------------------------------------------------- +7: valid loss at iteration 16000 | lm loss value: 2.406712E+00 | lm loss PPL: 1.109741E+01 | +7: ------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 16000 to checkpoints_146m +0: [2022-11-25 18:54:32,253] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step16000 is begin to save! +0: [2022-11-25 18:54:32,257] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_01-model_00-model_states.pt... +0: [2022-11-25 18:54:32,352] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_01-model_00-model_states.pt. +0: [2022-11-25 18:54:32,353] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_03-model_00-model_states.pt... +0: [2022-11-25 18:54:32,368] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_03-model_00-model_states.pt. +0: [2022-11-25 18:54:32,369] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_04-model_00-model_states.pt... +0: [2022-11-25 18:54:32,384] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_04-model_00-model_states.pt. +0: [2022-11-25 18:54:32,384] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_05-model_00-model_states.pt... +0: [2022-11-25 18:54:32,400] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_05-model_00-model_states.pt. +0: [2022-11-25 18:54:32,400] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_06-model_00-model_states.pt... +0: [2022-11-25 18:54:32,415] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_06-model_00-model_states.pt. +0: [2022-11-25 18:54:32,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_07-model_00-model_states.pt... +0: [2022-11-25 18:54:32,431] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_07-model_00-model_states.pt. +0: [2022-11-25 18:54:32,432] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_08-model_00-model_states.pt... +0: [2022-11-25 18:54:32,447] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_08-model_00-model_states.pt. +0: [2022-11-25 18:54:32,447] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_09-model_00-model_states.pt... +0: [2022-11-25 18:54:32,463] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_09-model_00-model_states.pt. +0: [2022-11-25 18:54:32,463] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_10-model_00-model_states.pt... +0: [2022-11-25 18:54:32,478] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_10-model_00-model_states.pt. +0: [2022-11-25 18:54:32,478] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_11-model_00-model_states.pt... +0: [2022-11-25 18:54:32,494] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_11-model_00-model_states.pt. +0: [2022-11-25 18:54:32,494] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_12-model_00-model_states.pt... +0: [2022-11-25 18:54:32,509] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_12-model_00-model_states.pt. +0: [2022-11-25 18:54:32,510] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_13-model_00-model_states.pt... +0: [2022-11-25 18:54:32,525] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_13-model_00-model_states.pt. +0: [2022-11-25 18:54:32,525] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_14-model_00-model_states.pt... +0: [2022-11-25 18:54:32,541] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_14-model_00-model_states.pt. +0: [2022-11-25 18:54:32,541] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_15-model_00-model_states.pt... +0: [2022-11-25 18:54:32,556] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_15-model_00-model_states.pt. +0: [2022-11-25 18:54:32,557] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_16-model_00-model_states.pt... +0: [2022-11-25 18:54:32,572] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_16-model_00-model_states.pt. +0: [2022-11-25 18:54:32,572] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_17-model_00-model_states.pt... +0: [2022-11-25 18:54:32,588] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_17-model_00-model_states.pt. +0: [2022-11-25 18:54:32,588] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/layer_19-model_00-model_states.pt... +0: [2022-11-25 18:54:32,589] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/layer_19-model_00-model_states.pt. +0: [2022-11-25 18:54:32,590] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step16000/mp_rank_00_model_states.pt +0: [2022-11-25 18:54:32,590] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/mp_rank_00_model_states.pt... +0: [2022-11-25 18:54:32,592] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/mp_rank_00_model_states.pt. +0: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:54:32,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step16000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:54:32,644] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:54:32,644] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 18:54:32,644] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2022-11-25 18:54:32,644] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:54:32,644] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 18:54:32,644] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2022-11-25 18:54:32,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:54:32,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 18:54:32,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2022-11-25 18:54:32,644] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:54:32,644] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 18:54:32,644] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:54:32,644] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2022-11-25 18:54:32,644] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 18:54:32,644] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2022-11-25 18:54:32,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:54:32,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 18:54:32,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2022-11-25 18:54:32,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:54:32,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2022-11-25 18:54:32,646] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2022-11-25 18:54:32,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:54:32,646] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 18:54:32,646] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2022-11-25 18:54:32,646] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:54:32,646] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 18:54:32,646] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2022-11-25 18:54:32,646] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:54:32,646] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:54:32,646] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 18:54:32,646] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 18:54:32,646] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2022-11-25 18:54:32,646] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:54:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2022-11-25 18:54:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:54:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +4: [2022-11-25 18:54:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +2: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2022-11-25 18:54:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +5: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:54:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +2: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +5: [2022-11-25 18:54:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +7: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +7: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +3: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +6: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +6: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +4: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2022-11-25 18:54:32,649] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:54:32,649] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 18:54:32,649] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2022-11-25 18:54:32,649] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:54:32,649] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 18:54:32,649] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2022-11-25 18:54:32,649] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:54:32,649] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:54:32,649] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +0: [2022-11-25 18:54:32,649] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +7: [2022-11-25 18:54:32,649] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2022-11-25 18:54:32,649] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2022-11-25 18:54:32,650] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:54:32,650] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:54:32,650] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 18:54:32,650] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2022-11-25 18:54:32,650] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:54:32,651] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +3: [2022-11-25 18:54:32,650] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:54:32,651] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2022-11-25 18:54:32,651] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 18:54:32,651] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2022-11-25 18:54:32,651] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:54:32,651] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 18:54:32,651] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2022-11-25 18:54:32,651] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:54:32,651] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 18:54:32,651] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2022-11-25 18:54:32,652] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:54:32,652] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 18:54:32,652] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2022-11-25 18:54:32,652] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:54:32,652] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:54:32,652] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 18:54:32,652] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 18:54:32,652] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2022-11-25 18:54:32,652] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2022-11-25 18:54:32,652] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:54:32,652] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 18:54:32,652] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2022-11-25 18:54:32,652] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:54:32,652] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 18:54:32,653] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2022-11-25 18:54:32,653] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:54:32,653] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:54:32,653] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +2: [2022-11-25 18:54:32,653] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +7: [2022-11-25 18:54:32,653] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2022-11-25 18:54:32,653] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2022-11-25 18:54:32,655] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:54:32,654] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:54:32,654] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +4: [2022-11-25 18:54:32,656] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +2: [2022-11-25 18:54:32,654] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2022-11-25 18:54:32,656] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2022-11-25 18:54:32,656] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:54:32,656] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 18:54:32,656] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2022-11-25 18:54:32,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:54:32,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 18:54:32,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2022-11-25 18:54:32,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2022-11-25 18:54:32,656] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:54:32,656] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:54:32,656] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 18:54:32,656] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 18:54:32,656] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:54:32,656] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2022-11-25 18:54:32,656] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2022-11-25 18:54:32,656] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 18:54:32,656] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2022-11-25 18:54:32,656] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:54:32,656] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 18:54:32,656] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2022-11-25 18:54:32,657] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:54:32,657] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2022-11-25 18:54:32,657] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2022-11-25 18:54:32,657] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:54:32,658] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 18:54:32,658] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2022-11-25 18:54:32,658] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:54:32,658] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:54:32,658] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:54:32,658] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:54:32,658] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 18:54:32,658] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 18:54:32,658] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2022-11-25 18:54:32,658] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 18:54:32,658] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 18:54:32,658] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2022-11-25 18:54:32,658] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2022-11-25 18:54:32,658] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2022-11-25 18:54:32,660] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:54:32,660] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 18:54:32,660] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2022-11-25 18:54:32,673] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step16000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 18:54:32,673] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: successfully saved checkpoint at iteration 16000 to checkpoints_146m +7: time (ms) | save-checkpoint: 424.30 +7: iteration 16010/ 21553 | consumed samples: 4098560 | consumed tokens: 8393850880 | elapsed time per iteration (s): 0.35 | learning rate: 4.835E-05 | global batch size: 256 | lm loss: 2.537794E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 727.922 | TFLOPs: 25.48 | +7: iteration 16020/ 21553 | consumed samples: 4101120 | consumed tokens: 8399093760 | elapsed time per iteration (s): 0.30 | learning rate: 4.825E-05 | global batch size: 256 | lm loss: 2.542720E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.667 | TFLOPs: 29.78 | +7: iteration 16030/ 21553 | consumed samples: 4103680 | consumed tokens: 8404336640 | elapsed time per iteration (s): 0.31 | learning rate: 4.815E-05 | global batch size: 256 | lm loss: 2.531994E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.041 | TFLOPs: 29.09 | +7: iteration 16040/ 21553 | consumed samples: 4106240 | consumed tokens: 8409579520 | elapsed time per iteration (s): 0.31 | learning rate: 4.806E-05 | global batch size: 256 | lm loss: 2.574077E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.026 | TFLOPs: 29.30 | +7: iteration 16050/ 21553 | consumed samples: 4108800 | consumed tokens: 8414822400 | elapsed time per iteration (s): 0.31 | learning rate: 4.796E-05 | global batch size: 256 | lm loss: 2.507935E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.806 | TFLOPs: 29.22 | +7: iteration 16060/ 21553 | consumed samples: 4111360 | consumed tokens: 8420065280 | elapsed time per iteration (s): 0.31 | learning rate: 4.786E-05 | global batch size: 256 | lm loss: 2.578510E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.184 | TFLOPs: 29.17 | +7: iteration 16070/ 21553 | consumed samples: 4113920 | consumed tokens: 8425308160 | elapsed time per iteration (s): 0.30 | learning rate: 4.777E-05 | global batch size: 256 | lm loss: 2.542983E+00 | grad norm: 0.265 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.448 | TFLOPs: 29.95 | +7: iteration 16080/ 21553 | consumed samples: 4116480 | consumed tokens: 8430551040 | elapsed time per iteration (s): 0.31 | learning rate: 4.767E-05 | global batch size: 256 | lm loss: 2.537057E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.523 | TFLOPs: 28.83 | +7: iteration 16090/ 21553 | consumed samples: 4119040 | consumed tokens: 8435793920 | elapsed time per iteration (s): 0.31 | learning rate: 4.758E-05 | global batch size: 256 | lm loss: 2.543508E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.822 | TFLOPs: 28.98 | +7: iteration 16100/ 21553 | consumed samples: 4121600 | consumed tokens: 8441036800 | elapsed time per iteration (s): 0.30 | learning rate: 4.748E-05 | global batch size: 256 | lm loss: 2.521639E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.014 | TFLOPs: 29.48 | +7: iteration 16110/ 21553 | consumed samples: 4124160 | consumed tokens: 8446279680 | elapsed time per iteration (s): 0.31 | learning rate: 4.739E-05 | global batch size: 256 | lm loss: 2.554307E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.329 | TFLOPs: 29.07 | +7: iteration 16120/ 21553 | consumed samples: 4126720 | consumed tokens: 8451522560 | elapsed time per iteration (s): 0.30 | learning rate: 4.729E-05 | global batch size: 256 | lm loss: 2.565523E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.585 | TFLOPs: 29.50 | +7: iteration 16130/ 21553 | consumed samples: 4129280 | consumed tokens: 8456765440 | elapsed time per iteration (s): 0.31 | learning rate: 4.720E-05 | global batch size: 256 | lm loss: 2.532919E+00 | grad norm: 0.309 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.732 | TFLOPs: 29.26 | +7: iteration 16140/ 21553 | consumed samples: 4131840 | consumed tokens: 8462008320 | elapsed time per iteration (s): 0.31 | learning rate: 4.710E-05 | global batch size: 256 | lm loss: 2.515429E+00 | grad norm: 0.267 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.136 | TFLOPs: 29.17 | +7: iteration 16150/ 21553 | consumed samples: 4134400 | consumed tokens: 8467251200 | elapsed time per iteration (s): 0.31 | learning rate: 4.701E-05 | global batch size: 256 | lm loss: 2.547385E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.496 | TFLOPs: 29.18 | +7: iteration 16160/ 21553 | consumed samples: 4136960 | consumed tokens: 8472494080 | elapsed time per iteration (s): 0.30 | learning rate: 4.691E-05 | global batch size: 256 | lm loss: 2.541065E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.049 | TFLOPs: 29.93 | +7: iteration 16170/ 21553 | consumed samples: 4139520 | consumed tokens: 8477736960 | elapsed time per iteration (s): 0.30 | learning rate: 4.682E-05 | global batch size: 256 | lm loss: 2.529238E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.139 | TFLOPs: 29.94 | +7: iteration 16180/ 21553 | consumed samples: 4142080 | consumed tokens: 8482979840 | elapsed time per iteration (s): 0.30 | learning rate: 4.672E-05 | global batch size: 256 | lm loss: 2.560107E+00 | grad norm: 0.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.015 | TFLOPs: 29.62 | +7: iteration 16190/ 21553 | consumed samples: 4144640 | consumed tokens: 8488222720 | elapsed time per iteration (s): 0.30 | learning rate: 4.663E-05 | global batch size: 256 | lm loss: 2.519464E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.715 | TFLOPs: 29.64 | +7: iteration 16200/ 21553 | consumed samples: 4147200 | consumed tokens: 8493465600 | elapsed time per iteration (s): 0.31 | learning rate: 4.654E-05 | global batch size: 256 | lm loss: 2.537206E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.946 | TFLOPs: 29.16 | +7: iteration 16210/ 21553 | consumed samples: 4149760 | consumed tokens: 8498708480 | elapsed time per iteration (s): 0.30 | learning rate: 4.644E-05 | global batch size: 256 | lm loss: 2.534857E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.981 | TFLOPs: 29.55 | +7: iteration 16220/ 21553 | consumed samples: 4152320 | consumed tokens: 8503951360 | elapsed time per iteration (s): 0.31 | learning rate: 4.635E-05 | global batch size: 256 | lm loss: 2.522532E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.386 | TFLOPs: 28.61 | +7: iteration 16230/ 21553 | consumed samples: 4154880 | consumed tokens: 8509194240 | elapsed time per iteration (s): 0.32 | learning rate: 4.625E-05 | global batch size: 256 | lm loss: 2.534012E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 810.089 | TFLOPs: 28.36 | +7: iteration 16240/ 21553 | consumed samples: 4157440 | consumed tokens: 8514437120 | elapsed time per iteration (s): 0.31 | learning rate: 4.616E-05 | global batch size: 256 | lm loss: 2.524661E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.670 | TFLOPs: 29.29 | +7: iteration 16250/ 21553 | consumed samples: 4160000 | consumed tokens: 8519680000 | elapsed time per iteration (s): 0.30 | learning rate: 4.607E-05 | global batch size: 256 | lm loss: 2.554034E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.946 | TFLOPs: 29.93 | +7: iteration 16260/ 21553 | consumed samples: 4162560 | consumed tokens: 8524922880 | elapsed time per iteration (s): 0.30 | learning rate: 4.597E-05 | global batch size: 256 | lm loss: 2.529911E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.633 | TFLOPs: 29.85 | +7: iteration 16270/ 21553 | consumed samples: 4165120 | consumed tokens: 8530165760 | elapsed time per iteration (s): 0.30 | learning rate: 4.588E-05 | global batch size: 256 | lm loss: 2.518065E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.892 | TFLOPs: 29.61 | +7: iteration 16280/ 21553 | consumed samples: 4167680 | consumed tokens: 8535408640 | elapsed time per iteration (s): 0.31 | learning rate: 4.579E-05 | global batch size: 256 | lm loss: 2.534796E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.983 | TFLOPs: 29.09 | +7: iteration 16290/ 21553 | consumed samples: 4170240 | consumed tokens: 8540651520 | elapsed time per iteration (s): 0.30 | learning rate: 4.570E-05 | global batch size: 256 | lm loss: 2.548730E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.013 | TFLOPs: 29.69 | +7: iteration 16300/ 21553 | consumed samples: 4172800 | consumed tokens: 8545894400 | elapsed time per iteration (s): 0.30 | learning rate: 4.560E-05 | global batch size: 256 | lm loss: 2.530763E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.780 | TFLOPs: 29.68 | +7: iteration 16310/ 21553 | consumed samples: 4175360 | consumed tokens: 8551137280 | elapsed time per iteration (s): 0.30 | learning rate: 4.551E-05 | global batch size: 256 | lm loss: 2.555529E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.037 | TFLOPs: 29.65 | +7: iteration 16320/ 21553 | consumed samples: 4177920 | consumed tokens: 8556380160 | elapsed time per iteration (s): 0.30 | learning rate: 4.542E-05 | global batch size: 256 | lm loss: 2.559146E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.680 | TFLOPs: 29.57 | +7: iteration 16330/ 21553 | consumed samples: 4180480 | consumed tokens: 8561623040 | elapsed time per iteration (s): 0.30 | learning rate: 4.533E-05 | global batch size: 256 | lm loss: 2.526960E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.336 | TFLOPs: 29.49 | +7: iteration 16340/ 21553 | consumed samples: 4183040 | consumed tokens: 8566865920 | elapsed time per iteration (s): 0.32 | learning rate: 4.523E-05 | global batch size: 256 | lm loss: 2.549574E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 802.463 | TFLOPs: 28.09 | +7: iteration 16350/ 21553 | consumed samples: 4185600 | consumed tokens: 8572108800 | elapsed time per iteration (s): 0.31 | learning rate: 4.514E-05 | global batch size: 256 | lm loss: 2.532674E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.600 | TFLOPs: 29.32 | +7: iteration 16360/ 21553 | consumed samples: 4188160 | consumed tokens: 8577351680 | elapsed time per iteration (s): 0.31 | learning rate: 4.505E-05 | global batch size: 256 | lm loss: 2.552740E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.659 | TFLOPs: 29.22 | +7: iteration 16370/ 21553 | consumed samples: 4190720 | consumed tokens: 8582594560 | elapsed time per iteration (s): 0.31 | learning rate: 4.496E-05 | global batch size: 256 | lm loss: 2.559379E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.552 | TFLOPs: 28.80 | +7: iteration 16380/ 21553 | consumed samples: 4193280 | consumed tokens: 8587837440 | elapsed time per iteration (s): 0.30 | learning rate: 4.487E-05 | global batch size: 256 | lm loss: 2.561757E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.370 | TFLOPs: 29.80 | +7: iteration 16390/ 21553 | consumed samples: 4195840 | consumed tokens: 8593080320 | elapsed time per iteration (s): 0.34 | learning rate: 4.478E-05 | global batch size: 256 | lm loss: 2.545624E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 757.646 | TFLOPs: 26.52 | +7: iteration 16400/ 21553 | consumed samples: 4198400 | consumed tokens: 8598323200 | elapsed time per iteration (s): 0.32 | learning rate: 4.468E-05 | global batch size: 256 | lm loss: 2.553670E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 789.916 | TFLOPs: 27.65 | +7: iteration 16410/ 21553 | consumed samples: 4200960 | consumed tokens: 8603566080 | elapsed time per iteration (s): 0.31 | learning rate: 4.459E-05 | global batch size: 256 | lm loss: 2.532979E+00 | grad norm: 0.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.738 | TFLOPs: 29.36 | +7: iteration 16420/ 21553 | consumed samples: 4203520 | consumed tokens: 8608808960 | elapsed time per iteration (s): 0.32 | learning rate: 4.450E-05 | global batch size: 256 | lm loss: 2.531130E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 789.242 | TFLOPs: 27.63 | +7: iteration 16430/ 21553 | consumed samples: 4206080 | consumed tokens: 8614051840 | elapsed time per iteration (s): 0.31 | learning rate: 4.441E-05 | global batch size: 256 | lm loss: 2.544959E+00 | grad norm: 0.262 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.004 | TFLOPs: 29.16 | +7: iteration 16440/ 21553 | consumed samples: 4208640 | consumed tokens: 8619294720 | elapsed time per iteration (s): 0.30 | learning rate: 4.432E-05 | global batch size: 256 | lm loss: 2.537345E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.257 | TFLOPs: 29.59 | +7: iteration 16450/ 21553 | consumed samples: 4211200 | consumed tokens: 8624537600 | elapsed time per iteration (s): 0.31 | learning rate: 4.423E-05 | global batch size: 256 | lm loss: 2.523857E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.775 | TFLOPs: 29.19 | +7: iteration 16460/ 21553 | consumed samples: 4213760 | consumed tokens: 8629780480 | elapsed time per iteration (s): 0.31 | learning rate: 4.414E-05 | global batch size: 256 | lm loss: 2.528732E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.658 | TFLOPs: 29.04 | +7: iteration 16470/ 21553 | consumed samples: 4216320 | consumed tokens: 8635023360 | elapsed time per iteration (s): 0.30 | learning rate: 4.405E-05 | global batch size: 256 | lm loss: 2.544538E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.428 | TFLOPs: 29.60 | +7: iteration 16480/ 21553 | consumed samples: 4218880 | consumed tokens: 8640266240 | elapsed time per iteration (s): 0.30 | learning rate: 4.396E-05 | global batch size: 256 | lm loss: 2.567534E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.973 | TFLOPs: 29.48 | +7: iteration 16490/ 21553 | consumed samples: 4221440 | consumed tokens: 8645509120 | elapsed time per iteration (s): 0.31 | learning rate: 4.387E-05 | global batch size: 256 | lm loss: 2.528342E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.789 | TFLOPs: 29.19 | +7: iteration 16500/ 21553 | consumed samples: 4224000 | consumed tokens: 8650752000 | elapsed time per iteration (s): 0.31 | learning rate: 4.378E-05 | global batch size: 256 | lm loss: 2.553882E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.784 | TFLOPs: 28.56 | +7: iteration 16510/ 21553 | consumed samples: 4226560 | consumed tokens: 8655994880 | elapsed time per iteration (s): 0.31 | learning rate: 4.369E-05 | global batch size: 256 | lm loss: 2.533578E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.075 | TFLOPs: 28.88 | +7: iteration 16520/ 21553 | consumed samples: 4229120 | consumed tokens: 8661237760 | elapsed time per iteration (s): 0.31 | learning rate: 4.360E-05 | global batch size: 256 | lm loss: 2.549250E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.789 | TFLOPs: 29.05 | +7: iteration 16530/ 21553 | consumed samples: 4231680 | consumed tokens: 8666480640 | elapsed time per iteration (s): 0.30 | learning rate: 4.351E-05 | global batch size: 256 | lm loss: 2.543926E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.649 | TFLOPs: 29.57 | +7: iteration 16540/ 21553 | consumed samples: 4234240 | consumed tokens: 8671723520 | elapsed time per iteration (s): 0.31 | learning rate: 4.342E-05 | global batch size: 256 | lm loss: 2.530238E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.160 | TFLOPs: 28.61 | +7: iteration 16550/ 21553 | consumed samples: 4236800 | consumed tokens: 8676966400 | elapsed time per iteration (s): 0.30 | learning rate: 4.333E-05 | global batch size: 256 | lm loss: 2.553505E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.328 | TFLOPs: 29.70 | +7: iteration 16560/ 21553 | consumed samples: 4239360 | consumed tokens: 8682209280 | elapsed time per iteration (s): 0.30 | learning rate: 4.324E-05 | global batch size: 256 | lm loss: 2.512118E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.526 | TFLOPs: 29.60 | +7: iteration 16570/ 21553 | consumed samples: 4241920 | consumed tokens: 8687452160 | elapsed time per iteration (s): 0.31 | learning rate: 4.315E-05 | global batch size: 256 | lm loss: 2.517787E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.754 | TFLOPs: 28.98 | +7: iteration 16580/ 21553 | consumed samples: 4244480 | consumed tokens: 8692695040 | elapsed time per iteration (s): 0.31 | learning rate: 4.307E-05 | global batch size: 256 | lm loss: 2.557744E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.306 | TFLOPs: 28.89 | +7: iteration 16590/ 21553 | consumed samples: 4247040 | consumed tokens: 8697937920 | elapsed time per iteration (s): 0.31 | learning rate: 4.298E-05 | global batch size: 256 | lm loss: 2.556402E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.641 | TFLOPs: 29.01 | +7: iteration 16600/ 21553 | consumed samples: 4249600 | consumed tokens: 8703180800 | elapsed time per iteration (s): 0.30 | learning rate: 4.289E-05 | global batch size: 256 | lm loss: 2.501528E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.001 | TFLOPs: 29.76 | +7: iteration 16610/ 21553 | consumed samples: 4252160 | consumed tokens: 8708423680 | elapsed time per iteration (s): 0.30 | learning rate: 4.280E-05 | global batch size: 256 | lm loss: 2.564690E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.545 | TFLOPs: 29.57 | +7: iteration 16620/ 21553 | consumed samples: 4254720 | consumed tokens: 8713666560 | elapsed time per iteration (s): 0.31 | learning rate: 4.271E-05 | global batch size: 256 | lm loss: 2.543514E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.612 | TFLOPs: 28.76 | +7: iteration 16630/ 21553 | consumed samples: 4257280 | consumed tokens: 8718909440 | elapsed time per iteration (s): 0.31 | learning rate: 4.263E-05 | global batch size: 256 | lm loss: 2.546386E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.607 | TFLOPs: 28.80 | +7: iteration 16640/ 21553 | consumed samples: 4259840 | consumed tokens: 8724152320 | elapsed time per iteration (s): 0.30 | learning rate: 4.254E-05 | global batch size: 256 | lm loss: 2.534413E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.856 | TFLOPs: 29.75 | +7: iteration 16650/ 21553 | consumed samples: 4262400 | consumed tokens: 8729395200 | elapsed time per iteration (s): 0.31 | learning rate: 4.245E-05 | global batch size: 256 | lm loss: 2.523925E+00 | grad norm: 0.268 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.799 | TFLOPs: 29.12 | +7: iteration 16660/ 21553 | consumed samples: 4264960 | consumed tokens: 8734638080 | elapsed time per iteration (s): 0.30 | learning rate: 4.236E-05 | global batch size: 256 | lm loss: 2.532123E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.852 | TFLOPs: 29.47 | +7: iteration 16670/ 21553 | consumed samples: 4267520 | consumed tokens: 8739880960 | elapsed time per iteration (s): 0.31 | learning rate: 4.227E-05 | global batch size: 256 | lm loss: 2.516809E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.950 | TFLOPs: 28.95 | +7: iteration 16680/ 21553 | consumed samples: 4270080 | consumed tokens: 8745123840 | elapsed time per iteration (s): 0.31 | learning rate: 4.219E-05 | global batch size: 256 | lm loss: 2.544324E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 818.461 | TFLOPs: 28.65 | +7: iteration 16690/ 21553 | consumed samples: 4272640 | consumed tokens: 8750366720 | elapsed time per iteration (s): 0.30 | learning rate: 4.210E-05 | global batch size: 256 | lm loss: 2.510474E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.939 | TFLOPs: 29.51 | +7: iteration 16700/ 21553 | consumed samples: 4275200 | consumed tokens: 8755609600 | elapsed time per iteration (s): 0.30 | learning rate: 4.201E-05 | global batch size: 256 | lm loss: 2.547476E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.795 | TFLOPs: 29.89 | +7: iteration 16710/ 21553 | consumed samples: 4277760 | consumed tokens: 8760852480 | elapsed time per iteration (s): 0.30 | learning rate: 4.193E-05 | global batch size: 256 | lm loss: 2.540780E+00 | grad norm: 0.267 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.217 | TFLOPs: 29.62 | +7: iteration 16720/ 21553 | consumed samples: 4280320 | consumed tokens: 8766095360 | elapsed time per iteration (s): 0.31 | learning rate: 4.184E-05 | global batch size: 256 | lm loss: 2.485124E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.924 | TFLOPs: 29.37 | +7: iteration 16730/ 21553 | consumed samples: 4282880 | consumed tokens: 8771338240 | elapsed time per iteration (s): 0.31 | learning rate: 4.175E-05 | global batch size: 256 | lm loss: 2.570201E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.699 | TFLOPs: 29.19 | +7: iteration 16740/ 21553 | consumed samples: 4285440 | consumed tokens: 8776581120 | elapsed time per iteration (s): 0.31 | learning rate: 4.167E-05 | global batch size: 256 | lm loss: 2.539256E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.171 | TFLOPs: 29.10 | +7: iteration 16750/ 21553 | consumed samples: 4288000 | consumed tokens: 8781824000 | elapsed time per iteration (s): 0.30 | learning rate: 4.158E-05 | global batch size: 256 | lm loss: 2.546323E+00 | grad norm: 0.321 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.734 | TFLOPs: 29.96 | +7: iteration 16760/ 21553 | consumed samples: 4290560 | consumed tokens: 8787066880 | elapsed time per iteration (s): 0.31 | learning rate: 4.150E-05 | global batch size: 256 | lm loss: 2.540267E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.811 | TFLOPs: 29.33 | +7: iteration 16770/ 21553 | consumed samples: 4293120 | consumed tokens: 8792309760 | elapsed time per iteration (s): 0.31 | learning rate: 4.141E-05 | global batch size: 256 | lm loss: 2.513837E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.809 | TFLOPs: 29.15 | +7: iteration 16780/ 21553 | consumed samples: 4295680 | consumed tokens: 8797552640 | elapsed time per iteration (s): 0.30 | learning rate: 4.132E-05 | global batch size: 256 | lm loss: 2.542107E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.575 | TFLOPs: 29.92 | +7: iteration 16790/ 21553 | consumed samples: 4298240 | consumed tokens: 8802795520 | elapsed time per iteration (s): 0.30 | learning rate: 4.124E-05 | global batch size: 256 | lm loss: 2.511794E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.740 | TFLOPs: 29.40 | +7: iteration 16800/ 21553 | consumed samples: 4300800 | consumed tokens: 8808038400 | elapsed time per iteration (s): 0.31 | learning rate: 4.115E-05 | global batch size: 256 | lm loss: 2.540949E+00 | grad norm: 0.307 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.089 | TFLOPs: 29.02 | +7: iteration 16810/ 21553 | consumed samples: 4303360 | consumed tokens: 8813281280 | elapsed time per iteration (s): 0.30 | learning rate: 4.107E-05 | global batch size: 256 | lm loss: 2.510197E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.491 | TFLOPs: 29.70 | +7: iteration 16820/ 21553 | consumed samples: 4305920 | consumed tokens: 8818524160 | elapsed time per iteration (s): 0.31 | learning rate: 4.098E-05 | global batch size: 256 | lm loss: 2.509438E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.949 | TFLOPs: 28.98 | +7: iteration 16830/ 21553 | consumed samples: 4308480 | consumed tokens: 8823767040 | elapsed time per iteration (s): 0.30 | learning rate: 4.090E-05 | global batch size: 256 | lm loss: 2.531036E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.980 | TFLOPs: 29.62 | +7: iteration 16840/ 21553 | consumed samples: 4311040 | consumed tokens: 8829009920 | elapsed time per iteration (s): 0.30 | learning rate: 4.081E-05 | global batch size: 256 | lm loss: 2.510848E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.160 | TFLOPs: 29.76 | +7: iteration 16850/ 21553 | consumed samples: 4313600 | consumed tokens: 8834252800 | elapsed time per iteration (s): 0.30 | learning rate: 4.073E-05 | global batch size: 256 | lm loss: 2.530679E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.579 | TFLOPs: 29.57 | +7: iteration 16860/ 21553 | consumed samples: 4316160 | consumed tokens: 8839495680 | elapsed time per iteration (s): 0.30 | learning rate: 4.064E-05 | global batch size: 256 | lm loss: 2.545185E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.338 | TFLOPs: 29.56 | +7: iteration 16870/ 21553 | consumed samples: 4318720 | consumed tokens: 8844738560 | elapsed time per iteration (s): 0.31 | learning rate: 4.056E-05 | global batch size: 256 | lm loss: 2.549067E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.503 | TFLOPs: 29.00 | +7: iteration 16880/ 21553 | consumed samples: 4321280 | consumed tokens: 8849981440 | elapsed time per iteration (s): 0.31 | learning rate: 4.047E-05 | global batch size: 256 | lm loss: 2.512022E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.572 | TFLOPs: 29.04 | +7: iteration 16890/ 21553 | consumed samples: 4323840 | consumed tokens: 8855224320 | elapsed time per iteration (s): 0.32 | learning rate: 4.039E-05 | global batch size: 256 | lm loss: 2.547599E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 789.067 | TFLOPs: 27.62 | +7: iteration 16900/ 21553 | consumed samples: 4326400 | consumed tokens: 8860467200 | elapsed time per iteration (s): 0.30 | learning rate: 4.031E-05 | global batch size: 256 | lm loss: 2.519660E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.227 | TFLOPs: 29.45 | +7: iteration 16910/ 21553 | consumed samples: 4328960 | consumed tokens: 8865710080 | elapsed time per iteration (s): 0.32 | learning rate: 4.022E-05 | global batch size: 256 | lm loss: 2.512088E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 794.822 | TFLOPs: 27.82 | +7: iteration 16920/ 21553 | consumed samples: 4331520 | consumed tokens: 8870952960 | elapsed time per iteration (s): 0.30 | learning rate: 4.014E-05 | global batch size: 256 | lm loss: 2.512210E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.315 | TFLOPs: 29.77 | +7: iteration 16930/ 21553 | consumed samples: 4334080 | consumed tokens: 8876195840 | elapsed time per iteration (s): 0.30 | learning rate: 4.006E-05 | global batch size: 256 | lm loss: 2.516054E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.329 | TFLOPs: 29.66 | +7: iteration 16940/ 21553 | consumed samples: 4336640 | consumed tokens: 8881438720 | elapsed time per iteration (s): 0.30 | learning rate: 3.997E-05 | global batch size: 256 | lm loss: 2.533226E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.604 | TFLOPs: 29.78 | +7: iteration 16950/ 21553 | consumed samples: 4339200 | consumed tokens: 8886681600 | elapsed time per iteration (s): 0.31 | learning rate: 3.989E-05 | global batch size: 256 | lm loss: 2.509463E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.724 | TFLOPs: 29.05 | +7: iteration 16960/ 21553 | consumed samples: 4341760 | consumed tokens: 8891924480 | elapsed time per iteration (s): 0.30 | learning rate: 3.981E-05 | global batch size: 256 | lm loss: 2.529752E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.074 | TFLOPs: 29.44 | +7: iteration 16970/ 21553 | consumed samples: 4344320 | consumed tokens: 8897167360 | elapsed time per iteration (s): 0.30 | learning rate: 3.972E-05 | global batch size: 256 | lm loss: 2.526478E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.317 | TFLOPs: 29.70 | +7: iteration 16980/ 21553 | consumed samples: 4346880 | consumed tokens: 8902410240 | elapsed time per iteration (s): 0.30 | learning rate: 3.964E-05 | global batch size: 256 | lm loss: 2.563015E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.576 | TFLOPs: 29.57 | +7: iteration 16990/ 21553 | consumed samples: 4349440 | consumed tokens: 8907653120 | elapsed time per iteration (s): 0.31 | learning rate: 3.956E-05 | global batch size: 256 | lm loss: 2.549234E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.270 | TFLOPs: 29.24 | +7: iteration 17000/ 21553 | consumed samples: 4352000 | consumed tokens: 8912896000 | elapsed time per iteration (s): 0.30 | learning rate: 3.948E-05 | global batch size: 256 | lm loss: 2.548369E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.955 | TFLOPs: 29.68 | +7: ------------------------------------------------------------------------------------------- +7: valid loss at iteration 17000 | lm loss value: 2.511586E+00 | lm loss PPL: 1.232446E+01 | +7: ------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 17000 to checkpoints_146m +0: [2022-11-25 18:59:39,246] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step17000 is begin to save! +0: [2022-11-25 18:59:39,251] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_01-model_00-model_states.pt... +0: [2022-11-25 18:59:39,345] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_01-model_00-model_states.pt. +0: [2022-11-25 18:59:39,346] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_03-model_00-model_states.pt... +0: [2022-11-25 18:59:39,362] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_03-model_00-model_states.pt. +0: [2022-11-25 18:59:39,363] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_04-model_00-model_states.pt... +0: [2022-11-25 18:59:39,378] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_04-model_00-model_states.pt. +0: [2022-11-25 18:59:39,378] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_05-model_00-model_states.pt... +0: [2022-11-25 18:59:39,394] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_05-model_00-model_states.pt. +0: [2022-11-25 18:59:39,394] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_06-model_00-model_states.pt... +0: [2022-11-25 18:59:39,409] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_06-model_00-model_states.pt. +0: [2022-11-25 18:59:39,410] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_07-model_00-model_states.pt... +0: [2022-11-25 18:59:39,425] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_07-model_00-model_states.pt. +0: [2022-11-25 18:59:39,425] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_08-model_00-model_states.pt... +0: [2022-11-25 18:59:39,440] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_08-model_00-model_states.pt. +0: [2022-11-25 18:59:39,441] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_09-model_00-model_states.pt... +0: [2022-11-25 18:59:39,456] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_09-model_00-model_states.pt. +0: [2022-11-25 18:59:39,456] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_10-model_00-model_states.pt... +0: [2022-11-25 18:59:39,471] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_10-model_00-model_states.pt. +0: [2022-11-25 18:59:39,472] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_11-model_00-model_states.pt... +0: [2022-11-25 18:59:39,487] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_11-model_00-model_states.pt. +0: [2022-11-25 18:59:39,488] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_12-model_00-model_states.pt... +0: [2022-11-25 18:59:39,503] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_12-model_00-model_states.pt. +0: [2022-11-25 18:59:39,503] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_13-model_00-model_states.pt... +0: [2022-11-25 18:59:39,518] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_13-model_00-model_states.pt. +0: [2022-11-25 18:59:39,519] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_14-model_00-model_states.pt... +0: [2022-11-25 18:59:39,534] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_14-model_00-model_states.pt. +0: [2022-11-25 18:59:39,534] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_15-model_00-model_states.pt... +0: [2022-11-25 18:59:39,549] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_15-model_00-model_states.pt. +0: [2022-11-25 18:59:39,550] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_16-model_00-model_states.pt... +0: [2022-11-25 18:59:39,565] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_16-model_00-model_states.pt. +0: [2022-11-25 18:59:39,565] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_17-model_00-model_states.pt... +0: [2022-11-25 18:59:39,581] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_17-model_00-model_states.pt. +0: [2022-11-25 18:59:39,581] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/layer_19-model_00-model_states.pt... +0: [2022-11-25 18:59:39,582] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/layer_19-model_00-model_states.pt. +0: [2022-11-25 18:59:39,583] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step17000/mp_rank_00_model_states.pt +0: [2022-11-25 18:59:39,583] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/mp_rank_00_model_states.pt... +0: [2022-11-25 18:59:39,585] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/mp_rank_00_model_states.pt. +0: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +4: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +7: [2022-11-25 18:59:39,603] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step17000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +3: [2022-11-25 18:59:39,635] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:59:39,635] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 18:59:39,635] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2022-11-25 18:59:39,638] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:59:39,639] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 18:59:39,639] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2022-11-25 18:59:39,639] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:59:39,639] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:59:39,639] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:59:39,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:59:39,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 18:59:39,640] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2022-11-25 18:59:39,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:59:39,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:59:39,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:59:39,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:59:39,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:59:39,637] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2022-11-25 18:59:39,637] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2022-11-25 18:59:39,637] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2022-11-25 18:59:39,637] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2022-11-25 18:59:39,638] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2022-11-25 18:59:39,638] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 18:59:39,638] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +2: [2022-11-25 18:59:39,639] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:59:39,638] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2022-11-25 18:59:39,639] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +6: [2022-11-25 18:59:39,638] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2022-11-25 18:59:39,638] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2022-11-25 18:59:39,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:59:39,639] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2022-11-25 18:59:39,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 18:59:39,640] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2022-11-25 18:59:39,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:59:39,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 18:59:39,640] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2022-11-25 18:59:39,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:59:39,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:59:39,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:59:39,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +2: [2022-11-25 18:59:39,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +6: [2022-11-25 18:59:39,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2022-11-25 18:59:39,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:59:39,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2022-11-25 18:59:39,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 18:59:39,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2022-11-25 18:59:39,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:59:39,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:59:39,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +2: [2022-11-25 18:59:39,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 18:59:39,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2022-11-25 18:59:39,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2022-11-25 18:59:39,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:59:39,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:59:39,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 18:59:39,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 18:59:39,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2022-11-25 18:59:39,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2022-11-25 18:59:39,639] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 18:59:39,639] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 18:59:39,639] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 18:59:39,639] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2022-11-25 18:59:39,639] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2022-11-25 18:59:39,639] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2022-11-25 18:59:39,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:59:39,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 18:59:39,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2022-11-25 18:59:39,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:59:39,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2022-11-25 18:59:39,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:59:39,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2022-11-25 18:59:39,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:59:39,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2022-11-25 18:59:39,644] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:59:39,644] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:59:39,644] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +5: [2022-11-25 18:59:39,644] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +3: [2022-11-25 18:59:39,644] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2022-11-25 18:59:39,644] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2022-11-25 18:59:39,644] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:59:39,644] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 18:59:39,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2022-11-25 18:59:39,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:59:39,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 18:59:39,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2022-11-25 18:59:39,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:59:39,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:59:39,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 18:59:39,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 18:59:39,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2022-11-25 18:59:39,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2022-11-25 18:59:39,646] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:59:39,646] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:59:39,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 18:59:39,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2022-11-25 18:59:39,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:59:39,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 18:59:39,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2022-11-25 18:59:39,646] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 18:59:39,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2022-11-25 18:59:39,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:59:39,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 18:59:39,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2022-11-25 18:59:39,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 18:59:39,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 18:59:39,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2022-11-25 18:59:39,648] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:59:39,648] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:59:39,648] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:59:39,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 18:59:39,648] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 18:59:39,649] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 18:59:39,649] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2022-11-25 18:59:39,649] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2022-11-25 18:59:39,649] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:59:39,644] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:59:39,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +2: [2022-11-25 18:59:39,644] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +6: [2022-11-25 18:59:39,644] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2022-11-25 18:59:39,644] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2022-11-25 18:59:39,646] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:59:39,646] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2022-11-25 18:59:39,646] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2022-11-25 18:59:39,646] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2022-11-25 18:59:39,646] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2022-11-25 18:59:39,646] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2022-11-25 18:59:39,649] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:59:39,649] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 18:59:39,649] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2022-11-25 18:59:39,649] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:59:39,649] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 18:59:39,649] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 18:59:39,649] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 18:59:39,649] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2022-11-25 18:59:39,649] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:59:39,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2022-11-25 18:59:39,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2022-11-25 18:59:39,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2022-11-25 18:59:39,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:59:39,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2022-11-25 18:59:39,646] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2022-11-25 18:59:39,650] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 18:59:39,650] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 18:59:39,650] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2022-11-25 18:59:39,650] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:59:39,650] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:59:39,650] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:59:39,650] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:59:39,650] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 18:59:39,650] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 18:59:39,650] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 18:59:39,650] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 18:59:39,650] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2022-11-25 18:59:39,650] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 18:59:39,650] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2022-11-25 18:59:39,650] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2022-11-25 18:59:39,650] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2022-11-25 18:59:39,650] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2022-11-25 18:59:39,650] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2022-11-25 18:59:39,651] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 18:59:39,651] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 18:59:39,652] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2022-11-25 18:59:39,653] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:59:39,653] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2022-11-25 18:59:39,653] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 18:59:39,653] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 18:59:39,653] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2022-11-25 18:59:39,653] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2022-11-25 18:59:39,653] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:59:39,653] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:59:39,653] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 18:59:39,653] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 18:59:39,653] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 18:59:39,654] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 18:59:39,654] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2022-11-25 18:59:39,654] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2022-11-25 18:59:39,654] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2022-11-25 18:59:39,670] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step17000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 18:59:39,670] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: successfully saved checkpoint at iteration 17000 to checkpoints_146m +7: time (ms) | save-checkpoint: 431.31 +7: iteration 17010/ 21553 | consumed samples: 4354560 | consumed tokens: 8918138880 | elapsed time per iteration (s): 0.35 | learning rate: 3.939E-05 | global batch size: 256 | lm loss: 2.540890E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 722.828 | TFLOPs: 25.30 | +7: iteration 17020/ 21553 | consumed samples: 4357120 | consumed tokens: 8923381760 | elapsed time per iteration (s): 0.31 | learning rate: 3.931E-05 | global batch size: 256 | lm loss: 2.556537E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.253 | TFLOPs: 29.20 | +7: iteration 17030/ 21553 | consumed samples: 4359680 | consumed tokens: 8928624640 | elapsed time per iteration (s): 0.31 | learning rate: 3.923E-05 | global batch size: 256 | lm loss: 2.550512E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.969 | TFLOPs: 29.09 | +7: iteration 17040/ 21553 | consumed samples: 4362240 | consumed tokens: 8933867520 | elapsed time per iteration (s): 0.30 | learning rate: 3.915E-05 | global batch size: 256 | lm loss: 2.549754E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.279 | TFLOPs: 29.59 | +7: iteration 17050/ 21553 | consumed samples: 4364800 | consumed tokens: 8939110400 | elapsed time per iteration (s): 0.31 | learning rate: 3.907E-05 | global batch size: 256 | lm loss: 2.555241E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.396 | TFLOPs: 28.93 | +7: iteration 17060/ 21553 | consumed samples: 4367360 | consumed tokens: 8944353280 | elapsed time per iteration (s): 0.30 | learning rate: 3.898E-05 | global batch size: 256 | lm loss: 2.481861E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.025 | TFLOPs: 29.58 | +7: iteration 17070/ 21553 | consumed samples: 4369920 | consumed tokens: 8949596160 | elapsed time per iteration (s): 0.30 | learning rate: 3.890E-05 | global batch size: 256 | lm loss: 2.537966E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.133 | TFLOPs: 29.52 | +7: iteration 17080/ 21553 | consumed samples: 4372480 | consumed tokens: 8954839040 | elapsed time per iteration (s): 0.31 | learning rate: 3.882E-05 | global batch size: 256 | lm loss: 2.540231E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.846 | TFLOPs: 29.33 | +7: iteration 17090/ 21553 | consumed samples: 4375040 | consumed tokens: 8960081920 | elapsed time per iteration (s): 0.30 | learning rate: 3.874E-05 | global batch size: 256 | lm loss: 2.537112E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.565 | TFLOPs: 29.88 | +7: iteration 17100/ 21553 | consumed samples: 4377600 | consumed tokens: 8965324800 | elapsed time per iteration (s): 0.31 | learning rate: 3.866E-05 | global batch size: 256 | lm loss: 2.530683E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.206 | TFLOPs: 29.24 | +7: iteration 17110/ 21553 | consumed samples: 4380160 | consumed tokens: 8970567680 | elapsed time per iteration (s): 0.30 | learning rate: 3.858E-05 | global batch size: 256 | lm loss: 2.520655E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.329 | TFLOPs: 29.70 | +7: iteration 17120/ 21553 | consumed samples: 4382720 | consumed tokens: 8975810560 | elapsed time per iteration (s): 0.30 | learning rate: 3.850E-05 | global batch size: 256 | lm loss: 2.569344E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.472 | TFLOPs: 29.42 | +7: iteration 17130/ 21553 | consumed samples: 4385280 | consumed tokens: 8981053440 | elapsed time per iteration (s): 0.31 | learning rate: 3.842E-05 | global batch size: 256 | lm loss: 2.511192E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.410 | TFLOPs: 29.28 | +7: iteration 17140/ 21553 | consumed samples: 4387840 | consumed tokens: 8986296320 | elapsed time per iteration (s): 0.30 | learning rate: 3.834E-05 | global batch size: 256 | lm loss: 2.528902E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.375 | TFLOPs: 29.87 | +7: iteration 17150/ 21553 | consumed samples: 4390400 | consumed tokens: 8991539200 | elapsed time per iteration (s): 0.30 | learning rate: 3.826E-05 | global batch size: 256 | lm loss: 2.533636E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.567 | TFLOPs: 29.53 | +7: iteration 17160/ 21553 | consumed samples: 4392960 | consumed tokens: 8996782080 | elapsed time per iteration (s): 0.33 | learning rate: 3.818E-05 | global batch size: 256 | lm loss: 2.526795E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 783.319 | TFLOPs: 27.42 | +7: iteration 17170/ 21553 | consumed samples: 4395520 | consumed tokens: 9002024960 | elapsed time per iteration (s): 0.30 | learning rate: 3.810E-05 | global batch size: 256 | lm loss: 2.553157E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.142 | TFLOPs: 29.45 | +7: iteration 17180/ 21553 | consumed samples: 4398080 | consumed tokens: 9007267840 | elapsed time per iteration (s): 0.31 | learning rate: 3.802E-05 | global batch size: 256 | lm loss: 2.523242E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.060 | TFLOPs: 29.27 | +7: iteration 17190/ 21553 | consumed samples: 4400640 | consumed tokens: 9012510720 | elapsed time per iteration (s): 0.30 | learning rate: 3.794E-05 | global batch size: 256 | lm loss: 2.552011E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.715 | TFLOPs: 29.68 | +7: iteration 17200/ 21553 | consumed samples: 4403200 | consumed tokens: 9017753600 | elapsed time per iteration (s): 0.31 | learning rate: 3.786E-05 | global batch size: 256 | lm loss: 2.488738E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.405 | TFLOPs: 29.28 | +7: iteration 17210/ 21553 | consumed samples: 4405760 | consumed tokens: 9022996480 | elapsed time per iteration (s): 0.30 | learning rate: 3.778E-05 | global batch size: 256 | lm loss: 2.511787E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.993 | TFLOPs: 29.86 | +7: iteration 17220/ 21553 | consumed samples: 4408320 | consumed tokens: 9028239360 | elapsed time per iteration (s): 0.32 | learning rate: 3.770E-05 | global batch size: 256 | lm loss: 2.508052E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 810.122 | TFLOPs: 28.36 | +7: iteration 17230/ 21553 | consumed samples: 4410880 | consumed tokens: 9033482240 | elapsed time per iteration (s): 0.30 | learning rate: 3.762E-05 | global batch size: 256 | lm loss: 2.555430E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.094 | TFLOPs: 29.69 | +7: iteration 17240/ 21553 | consumed samples: 4413440 | consumed tokens: 9038725120 | elapsed time per iteration (s): 0.31 | learning rate: 3.754E-05 | global batch size: 256 | lm loss: 2.554584E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.929 | TFLOPs: 28.91 | +7: iteration 17250/ 21553 | consumed samples: 4416000 | consumed tokens: 9043968000 | elapsed time per iteration (s): 0.31 | learning rate: 3.747E-05 | global batch size: 256 | lm loss: 2.521580E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.886 | TFLOPs: 29.23 | +7: iteration 17260/ 21553 | consumed samples: 4418560 | consumed tokens: 9049210880 | elapsed time per iteration (s): 0.31 | learning rate: 3.739E-05 | global batch size: 256 | lm loss: 2.529437E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.324 | TFLOPs: 28.82 | +7: iteration 17270/ 21553 | consumed samples: 4421120 | consumed tokens: 9054453760 | elapsed time per iteration (s): 0.30 | learning rate: 3.731E-05 | global batch size: 256 | lm loss: 2.552943E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.830 | TFLOPs: 29.44 | +7: iteration 17280/ 21553 | consumed samples: 4423680 | consumed tokens: 9059696640 | elapsed time per iteration (s): 0.30 | learning rate: 3.723E-05 | global batch size: 256 | lm loss: 2.534046E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.373 | TFLOPs: 29.70 | +7: iteration 17290/ 21553 | consumed samples: 4426240 | consumed tokens: 9064939520 | elapsed time per iteration (s): 0.31 | learning rate: 3.715E-05 | global batch size: 256 | lm loss: 2.539519E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.325 | TFLOPs: 29.24 | +7: iteration 17300/ 21553 | consumed samples: 4428800 | consumed tokens: 9070182400 | elapsed time per iteration (s): 0.30 | learning rate: 3.708E-05 | global batch size: 256 | lm loss: 2.542767E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.320 | TFLOPs: 29.98 | +7: iteration 17310/ 21553 | consumed samples: 4431360 | consumed tokens: 9075425280 | elapsed time per iteration (s): 0.32 | learning rate: 3.700E-05 | global batch size: 256 | lm loss: 2.510893E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 801.484 | TFLOPs: 28.06 | +7: iteration 17320/ 21553 | consumed samples: 4433920 | consumed tokens: 9080668160 | elapsed time per iteration (s): 0.31 | learning rate: 3.692E-05 | global batch size: 256 | lm loss: 2.521770E+00 | grad norm: 0.265 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.619 | TFLOPs: 29.22 | +7: iteration 17330/ 21553 | consumed samples: 4436480 | consumed tokens: 9085911040 | elapsed time per iteration (s): 0.30 | learning rate: 3.684E-05 | global batch size: 256 | lm loss: 2.544385E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.031 | TFLOPs: 29.79 | +7: iteration 17340/ 21553 | consumed samples: 4439040 | consumed tokens: 9091153920 | elapsed time per iteration (s): 0.31 | learning rate: 3.677E-05 | global batch size: 256 | lm loss: 2.521149E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.794 | TFLOPs: 28.91 | +7: iteration 17350/ 21553 | consumed samples: 4441600 | consumed tokens: 9096396800 | elapsed time per iteration (s): 0.30 | learning rate: 3.669E-05 | global batch size: 256 | lm loss: 2.510289E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.162 | TFLOPs: 29.90 | +7: iteration 17360/ 21553 | consumed samples: 4444160 | consumed tokens: 9101639680 | elapsed time per iteration (s): 0.30 | learning rate: 3.661E-05 | global batch size: 256 | lm loss: 2.500262E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.113 | TFLOPs: 29.48 | +7: iteration 17370/ 21553 | consumed samples: 4446720 | consumed tokens: 9106882560 | elapsed time per iteration (s): 0.30 | learning rate: 3.654E-05 | global batch size: 256 | lm loss: 2.508537E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.235 | TFLOPs: 29.83 | +7: iteration 17380/ 21553 | consumed samples: 4449280 | consumed tokens: 9112125440 | elapsed time per iteration (s): 0.32 | learning rate: 3.646E-05 | global batch size: 256 | lm loss: 2.538815E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 802.947 | TFLOPs: 28.11 | +7: iteration 17390/ 21553 | consumed samples: 4451840 | consumed tokens: 9117368320 | elapsed time per iteration (s): 0.31 | learning rate: 3.638E-05 | global batch size: 256 | lm loss: 2.562416E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.143 | TFLOPs: 29.20 | +7: iteration 17400/ 21553 | consumed samples: 4454400 | consumed tokens: 9122611200 | elapsed time per iteration (s): 0.31 | learning rate: 3.631E-05 | global batch size: 256 | lm loss: 2.535410E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.229 | TFLOPs: 28.85 | +7: iteration 17410/ 21553 | consumed samples: 4456960 | consumed tokens: 9127854080 | elapsed time per iteration (s): 0.30 | learning rate: 3.623E-05 | global batch size: 256 | lm loss: 2.524565E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.500 | TFLOPs: 29.53 | +7: iteration 17420/ 21553 | consumed samples: 4459520 | consumed tokens: 9133096960 | elapsed time per iteration (s): 0.31 | learning rate: 3.616E-05 | global batch size: 256 | lm loss: 2.528038E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.744 | TFLOPs: 29.36 | +7: iteration 17430/ 21553 | consumed samples: 4462080 | consumed tokens: 9138339840 | elapsed time per iteration (s): 0.31 | learning rate: 3.608E-05 | global batch size: 256 | lm loss: 2.548055E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.326 | TFLOPs: 29.03 | +7: iteration 17440/ 21553 | consumed samples: 4464640 | consumed tokens: 9143582720 | elapsed time per iteration (s): 0.30 | learning rate: 3.600E-05 | global batch size: 256 | lm loss: 2.512047E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.868 | TFLOPs: 29.79 | +7: iteration 17450/ 21553 | consumed samples: 4467200 | consumed tokens: 9148825600 | elapsed time per iteration (s): 0.31 | learning rate: 3.593E-05 | global batch size: 256 | lm loss: 2.541992E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.015 | TFLOPs: 28.67 | +7: iteration 17460/ 21553 | consumed samples: 4469760 | consumed tokens: 9154068480 | elapsed time per iteration (s): 0.31 | learning rate: 3.585E-05 | global batch size: 256 | lm loss: 2.549852E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.902 | TFLOPs: 29.30 | +7: iteration 17470/ 21553 | consumed samples: 4472320 | consumed tokens: 9159311360 | elapsed time per iteration (s): 0.30 | learning rate: 3.578E-05 | global batch size: 256 | lm loss: 2.497304E+00 | grad norm: 0.268 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.884 | TFLOPs: 29.58 | +7: iteration 17480/ 21553 | consumed samples: 4474880 | consumed tokens: 9164554240 | elapsed time per iteration (s): 0.30 | learning rate: 3.570E-05 | global batch size: 256 | lm loss: 2.535476E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.531 | TFLOPs: 29.70 | +7: iteration 17490/ 21553 | consumed samples: 4477440 | consumed tokens: 9169797120 | elapsed time per iteration (s): 0.31 | learning rate: 3.563E-05 | global batch size: 256 | lm loss: 2.555303E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.136 | TFLOPs: 29.20 | +7: iteration 17500/ 21553 | consumed samples: 4480000 | consumed tokens: 9175040000 | elapsed time per iteration (s): 0.31 | learning rate: 3.555E-05 | global batch size: 256 | lm loss: 2.508535E+00 | grad norm: 0.267 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.402 | TFLOPs: 28.79 | +7: iteration 17510/ 21553 | consumed samples: 4482560 | consumed tokens: 9180282880 | elapsed time per iteration (s): 0.31 | learning rate: 3.548E-05 | global batch size: 256 | lm loss: 2.507566E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.597 | TFLOPs: 29.18 | +7: iteration 17520/ 21553 | consumed samples: 4485120 | consumed tokens: 9185525760 | elapsed time per iteration (s): 0.30 | learning rate: 3.541E-05 | global batch size: 256 | lm loss: 2.532728E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.702 | TFLOPs: 29.40 | +7: iteration 17530/ 21553 | consumed samples: 4487680 | consumed tokens: 9190768640 | elapsed time per iteration (s): 0.30 | learning rate: 3.533E-05 | global batch size: 256 | lm loss: 2.563975E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.359 | TFLOPs: 29.49 | +7: iteration 17540/ 21553 | consumed samples: 4490240 | consumed tokens: 9196011520 | elapsed time per iteration (s): 0.31 | learning rate: 3.526E-05 | global batch size: 256 | lm loss: 2.568526E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.415 | TFLOPs: 28.58 | +7: iteration 17550/ 21553 | consumed samples: 4492800 | consumed tokens: 9201254400 | elapsed time per iteration (s): 0.30 | learning rate: 3.518E-05 | global batch size: 256 | lm loss: 2.539071E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.857 | TFLOPs: 29.79 | +7: iteration 17560/ 21553 | consumed samples: 4495360 | consumed tokens: 9206497280 | elapsed time per iteration (s): 0.31 | learning rate: 3.511E-05 | global batch size: 256 | lm loss: 2.515165E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.845 | TFLOPs: 29.23 | +7: iteration 17570/ 21553 | consumed samples: 4497920 | consumed tokens: 9211740160 | elapsed time per iteration (s): 0.30 | learning rate: 3.504E-05 | global batch size: 256 | lm loss: 2.516895E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.925 | TFLOPs: 29.54 | +7: iteration 17580/ 21553 | consumed samples: 4500480 | consumed tokens: 9216983040 | elapsed time per iteration (s): 0.31 | learning rate: 3.496E-05 | global batch size: 256 | lm loss: 2.546343E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.462 | TFLOPs: 29.04 | +7: iteration 17590/ 21553 | consumed samples: 4503040 | consumed tokens: 9222225920 | elapsed time per iteration (s): 0.30 | learning rate: 3.489E-05 | global batch size: 256 | lm loss: 2.511759E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.965 | TFLOPs: 29.51 | +7: iteration 17600/ 21553 | consumed samples: 4505600 | consumed tokens: 9227468800 | elapsed time per iteration (s): 0.30 | learning rate: 3.482E-05 | global batch size: 256 | lm loss: 2.550349E+00 | grad norm: 0.264 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.882 | TFLOPs: 29.65 | +7: iteration 17610/ 21553 | consumed samples: 4508160 | consumed tokens: 9232711680 | elapsed time per iteration (s): 0.31 | learning rate: 3.475E-05 | global batch size: 256 | lm loss: 2.533771E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.811 | TFLOPs: 29.12 | +7: iteration 17620/ 21553 | consumed samples: 4510720 | consumed tokens: 9237954560 | elapsed time per iteration (s): 0.30 | learning rate: 3.467E-05 | global batch size: 256 | lm loss: 2.545696E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.446 | TFLOPs: 29.53 | +7: iteration 17630/ 21553 | consumed samples: 4513280 | consumed tokens: 9243197440 | elapsed time per iteration (s): 0.31 | learning rate: 3.460E-05 | global batch size: 256 | lm loss: 2.516108E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.217 | TFLOPs: 28.47 | +7: iteration 17640/ 21553 | consumed samples: 4515840 | consumed tokens: 9248440320 | elapsed time per iteration (s): 0.31 | learning rate: 3.453E-05 | global batch size: 256 | lm loss: 2.511393E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.256 | TFLOPs: 28.54 | +7: iteration 17650/ 21553 | consumed samples: 4518400 | consumed tokens: 9253683200 | elapsed time per iteration (s): 0.31 | learning rate: 3.446E-05 | global batch size: 256 | lm loss: 2.541724E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.071 | TFLOPs: 28.46 | +7: iteration 17660/ 21553 | consumed samples: 4520960 | consumed tokens: 9258926080 | elapsed time per iteration (s): 0.30 | learning rate: 3.438E-05 | global batch size: 256 | lm loss: 2.544846E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.737 | TFLOPs: 29.61 | +7: iteration 17670/ 21553 | consumed samples: 4523520 | consumed tokens: 9264168960 | elapsed time per iteration (s): 0.31 | learning rate: 3.431E-05 | global batch size: 256 | lm loss: 2.491376E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.530 | TFLOPs: 29.25 | +7: iteration 17680/ 21553 | consumed samples: 4526080 | consumed tokens: 9269411840 | elapsed time per iteration (s): 0.31 | learning rate: 3.424E-05 | global batch size: 256 | lm loss: 2.527333E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.078 | TFLOPs: 29.34 | +7: iteration 17690/ 21553 | consumed samples: 4528640 | consumed tokens: 9274654720 | elapsed time per iteration (s): 0.31 | learning rate: 3.417E-05 | global batch size: 256 | lm loss: 2.562955E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.064 | TFLOPs: 29.16 | +7: iteration 17700/ 21553 | consumed samples: 4531200 | consumed tokens: 9279897600 | elapsed time per iteration (s): 0.30 | learning rate: 3.410E-05 | global batch size: 256 | lm loss: 2.535500E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.725 | TFLOPs: 29.64 | +7: iteration 17710/ 21553 | consumed samples: 4533760 | consumed tokens: 9285140480 | elapsed time per iteration (s): 0.30 | learning rate: 3.403E-05 | global batch size: 256 | lm loss: 2.529117E+00 | grad norm: 0.308 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.840 | TFLOPs: 29.61 | +7: iteration 17720/ 21553 | consumed samples: 4536320 | consumed tokens: 9290383360 | elapsed time per iteration (s): 0.31 | learning rate: 3.396E-05 | global batch size: 256 | lm loss: 2.532703E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.883 | TFLOPs: 28.81 | +7: iteration 17730/ 21553 | consumed samples: 4538880 | consumed tokens: 9295626240 | elapsed time per iteration (s): 0.31 | learning rate: 3.388E-05 | global batch size: 256 | lm loss: 2.488527E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.308 | TFLOPs: 29.24 | +7: iteration 17740/ 21553 | consumed samples: 4541440 | consumed tokens: 9300869120 | elapsed time per iteration (s): 0.30 | learning rate: 3.381E-05 | global batch size: 256 | lm loss: 2.530852E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.491 | TFLOPs: 29.56 | +7: iteration 17750/ 21553 | consumed samples: 4544000 | consumed tokens: 9306112000 | elapsed time per iteration (s): 0.30 | learning rate: 3.374E-05 | global batch size: 256 | lm loss: 2.555175E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.512 | TFLOPs: 29.98 | +7: iteration 17760/ 21553 | consumed samples: 4546560 | consumed tokens: 9311354880 | elapsed time per iteration (s): 0.30 | learning rate: 3.367E-05 | global batch size: 256 | lm loss: 2.554718E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.016 | TFLOPs: 29.72 | +7: iteration 17770/ 21553 | consumed samples: 4549120 | consumed tokens: 9316597760 | elapsed time per iteration (s): 0.30 | learning rate: 3.360E-05 | global batch size: 256 | lm loss: 2.548379E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.463 | TFLOPs: 29.70 | +7: iteration 17780/ 21553 | consumed samples: 4551680 | consumed tokens: 9321840640 | elapsed time per iteration (s): 0.31 | learning rate: 3.353E-05 | global batch size: 256 | lm loss: 2.544871E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.299 | TFLOPs: 29.17 | +7: iteration 17790/ 21553 | consumed samples: 4554240 | consumed tokens: 9327083520 | elapsed time per iteration (s): 0.30 | learning rate: 3.346E-05 | global batch size: 256 | lm loss: 2.506944E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.410 | TFLOPs: 29.70 | +7: iteration 17800/ 21553 | consumed samples: 4556800 | consumed tokens: 9332326400 | elapsed time per iteration (s): 0.31 | learning rate: 3.339E-05 | global batch size: 256 | lm loss: 2.541852E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.489 | TFLOPs: 29.00 | +7: iteration 17810/ 21553 | consumed samples: 4559360 | consumed tokens: 9337569280 | elapsed time per iteration (s): 0.31 | learning rate: 3.332E-05 | global batch size: 256 | lm loss: 2.539757E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.237 | TFLOPs: 29.24 | +7: iteration 17820/ 21553 | consumed samples: 4561920 | consumed tokens: 9342812160 | elapsed time per iteration (s): 0.31 | learning rate: 3.326E-05 | global batch size: 256 | lm loss: 2.528741E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.796 | TFLOPs: 29.15 | +7: iteration 17830/ 21553 | consumed samples: 4564480 | consumed tokens: 9348055040 | elapsed time per iteration (s): 0.30 | learning rate: 3.319E-05 | global batch size: 256 | lm loss: 2.545308E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.361 | TFLOPs: 29.49 | +7: iteration 17840/ 21553 | consumed samples: 4567040 | consumed tokens: 9353297920 | elapsed time per iteration (s): 0.31 | learning rate: 3.312E-05 | global batch size: 256 | lm loss: 2.523811E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.328 | TFLOPs: 28.47 | +7: iteration 17850/ 21553 | consumed samples: 4569600 | consumed tokens: 9358540800 | elapsed time per iteration (s): 0.30 | learning rate: 3.305E-05 | global batch size: 256 | lm loss: 2.524194E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.466 | TFLOPs: 29.67 | +7: iteration 17860/ 21553 | consumed samples: 4572160 | consumed tokens: 9363783680 | elapsed time per iteration (s): 0.30 | learning rate: 3.298E-05 | global batch size: 256 | lm loss: 2.514433E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.857 | TFLOPs: 29.54 | +7: iteration 17870/ 21553 | consumed samples: 4574720 | consumed tokens: 9369026560 | elapsed time per iteration (s): 0.30 | learning rate: 3.291E-05 | global batch size: 256 | lm loss: 2.541620E+00 | grad norm: 0.266 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.290 | TFLOPs: 29.80 | +7: iteration 17880/ 21553 | consumed samples: 4577280 | consumed tokens: 9374269440 | elapsed time per iteration (s): 0.30 | learning rate: 3.284E-05 | global batch size: 256 | lm loss: 2.538600E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.499 | TFLOPs: 29.56 | +7: iteration 17890/ 21553 | consumed samples: 4579840 | consumed tokens: 9379512320 | elapsed time per iteration (s): 0.31 | learning rate: 3.277E-05 | global batch size: 256 | lm loss: 2.508804E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.378 | TFLOPs: 28.72 | +7: iteration 17900/ 21553 | consumed samples: 4582400 | consumed tokens: 9384755200 | elapsed time per iteration (s): 0.30 | learning rate: 3.271E-05 | global batch size: 256 | lm loss: 2.572915E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.745 | TFLOPs: 29.50 | +7: iteration 17910/ 21553 | consumed samples: 4584960 | consumed tokens: 9389998080 | elapsed time per iteration (s): 0.30 | learning rate: 3.264E-05 | global batch size: 256 | lm loss: 2.478304E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.233 | TFLOPs: 29.73 | +7: iteration 17920/ 21553 | consumed samples: 4587520 | consumed tokens: 9395240960 | elapsed time per iteration (s): 0.30 | learning rate: 3.257E-05 | global batch size: 256 | lm loss: 2.523698E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.558 | TFLOPs: 29.71 | +7: iteration 17930/ 21553 | consumed samples: 4590080 | consumed tokens: 9400483840 | elapsed time per iteration (s): 0.30 | learning rate: 3.250E-05 | global batch size: 256 | lm loss: 2.546891E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.664 | TFLOPs: 29.39 | +7: iteration 17940/ 21553 | consumed samples: 4592640 | consumed tokens: 9405726720 | elapsed time per iteration (s): 0.30 | learning rate: 3.244E-05 | global batch size: 256 | lm loss: 2.531878E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.817 | TFLOPs: 29.50 | +7: iteration 17950/ 21553 | consumed samples: 4595200 | consumed tokens: 9410969600 | elapsed time per iteration (s): 0.30 | learning rate: 3.237E-05 | global batch size: 256 | lm loss: 2.540137E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.014 | TFLOPs: 29.55 | +7: iteration 17960/ 21553 | consumed samples: 4597760 | consumed tokens: 9416212480 | elapsed time per iteration (s): 0.30 | learning rate: 3.230E-05 | global batch size: 256 | lm loss: 2.515243E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.822 | TFLOPs: 29.85 | +7: iteration 17970/ 21553 | consumed samples: 4600320 | consumed tokens: 9421455360 | elapsed time per iteration (s): 0.30 | learning rate: 3.224E-05 | global batch size: 256 | lm loss: 2.521425E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.783 | TFLOPs: 29.43 | +7: iteration 17980/ 21553 | consumed samples: 4602880 | consumed tokens: 9426698240 | elapsed time per iteration (s): 0.30 | learning rate: 3.217E-05 | global batch size: 256 | lm loss: 2.546159E+00 | grad norm: 0.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.233 | TFLOPs: 29.45 | +7: iteration 17990/ 21553 | consumed samples: 4605440 | consumed tokens: 9431941120 | elapsed time per iteration (s): 0.30 | learning rate: 3.210E-05 | global batch size: 256 | lm loss: 2.529756E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.096 | TFLOPs: 29.58 | +0: [2022-11-25 19:04:45,430] [INFO] [logging.py:68:log_dist] [Rank 0] step=18000, skipped=0, lr=[3.2036439682204886e-05, 3.2036439682204886e-05, 3.2036439682204886e-05], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 18000/ 21553 | consumed samples: 4608000 | consumed tokens: 9437184000 | elapsed time per iteration (s): 0.31 | learning rate: 3.204E-05 | global batch size: 256 | lm loss: 2.535496E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.076 | TFLOPs: 28.74 | +0: steps: 18000 loss: 2.5000 iter time (s): 0.305 samples/sec: 840.471 +7: ------------------------------------------------------------------------------------------- +7: valid loss at iteration 18000 | lm loss value: 2.356913E+00 | lm loss PPL: 1.055831E+01 | +7: ------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 18000 to checkpoints_146m +0: [2022-11-25 19:04:45,559] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step18000 is begin to save! +0: [2022-11-25 19:04:45,562] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_01-model_00-model_states.pt... +0: [2022-11-25 19:04:45,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_01-model_00-model_states.pt. +0: [2022-11-25 19:04:45,702] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_03-model_00-model_states.pt... +0: [2022-11-25 19:04:45,724] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_03-model_00-model_states.pt. +0: [2022-11-25 19:04:45,724] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_04-model_00-model_states.pt... +0: [2022-11-25 19:04:45,745] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_04-model_00-model_states.pt. +0: [2022-11-25 19:04:45,745] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_05-model_00-model_states.pt... +0: [2022-11-25 19:04:45,767] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_05-model_00-model_states.pt. +0: [2022-11-25 19:04:45,767] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_06-model_00-model_states.pt... +0: [2022-11-25 19:04:45,788] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_06-model_00-model_states.pt. +0: [2022-11-25 19:04:45,789] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_07-model_00-model_states.pt... +0: [2022-11-25 19:04:45,810] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_07-model_00-model_states.pt. +0: [2022-11-25 19:04:45,810] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_08-model_00-model_states.pt... +0: [2022-11-25 19:04:45,832] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_08-model_00-model_states.pt. +0: [2022-11-25 19:04:45,832] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_09-model_00-model_states.pt... +0: [2022-11-25 19:04:45,853] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_09-model_00-model_states.pt. +0: [2022-11-25 19:04:45,853] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_10-model_00-model_states.pt... +0: [2022-11-25 19:04:45,875] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_10-model_00-model_states.pt. +0: [2022-11-25 19:04:45,875] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_11-model_00-model_states.pt... +0: [2022-11-25 19:04:45,896] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_11-model_00-model_states.pt. +0: [2022-11-25 19:04:45,896] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_12-model_00-model_states.pt... +0: [2022-11-25 19:04:45,911] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_12-model_00-model_states.pt. +0: [2022-11-25 19:04:45,911] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_13-model_00-model_states.pt... +0: [2022-11-25 19:04:45,926] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_13-model_00-model_states.pt. +0: [2022-11-25 19:04:45,926] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_14-model_00-model_states.pt... +0: [2022-11-25 19:04:45,941] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_14-model_00-model_states.pt. +0: [2022-11-25 19:04:45,942] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_15-model_00-model_states.pt... +0: [2022-11-25 19:04:45,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_15-model_00-model_states.pt. +0: [2022-11-25 19:04:45,957] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_16-model_00-model_states.pt... +0: [2022-11-25 19:04:45,971] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_16-model_00-model_states.pt. +0: [2022-11-25 19:04:45,972] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_17-model_00-model_states.pt... +0: [2022-11-25 19:04:45,986] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_17-model_00-model_states.pt. +0: [2022-11-25 19:04:45,987] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/layer_19-model_00-model_states.pt... +0: [2022-11-25 19:04:45,987] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/layer_19-model_00-model_states.pt. +0: [2022-11-25 19:04:45,988] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step18000/mp_rank_00_model_states.pt +0: [2022-11-25 19:04:45,988] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/mp_rank_00_model_states.pt... +0: [2022-11-25 19:04:45,990] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/mp_rank_00_model_states.pt. +0: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:04:46,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step18000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:04:46,040] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:04:46,041] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 19:04:46,041] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +3: [2022-11-25 19:04:46,041] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:04:46,041] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 19:04:46,041] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2022-11-25 19:04:46,043] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:04:46,043] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 19:04:46,043] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2022-11-25 19:04:46,043] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:04:46,043] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 19:04:46,043] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2022-11-25 19:04:46,044] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:04:46,044] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 19:04:46,044] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2022-11-25 19:04:46,045] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:04:46,045] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:04:46,045] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 19:04:46,045] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2022-11-25 19:04:46,043] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:04:46,045] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:04:46,042] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:04:46,042] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:04:46,044] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +6: [2022-11-25 19:04:46,042] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 19:04:46,042] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +4: [2022-11-25 19:04:46,044] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2022-11-25 19:04:46,042] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2022-11-25 19:04:46,042] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2022-11-25 19:04:46,044] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:04:46,045] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +6: [2022-11-25 19:04:46,044] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 19:04:46,044] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2022-11-25 19:04:46,045] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:04:46,045] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +2: [2022-11-25 19:04:46,045] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2022-11-25 19:04:46,045] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:04:46,045] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 19:04:46,045] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2022-11-25 19:04:46,045] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:04:46,046] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:04:46,046] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +3: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:04:46,046] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:04:46,046] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 19:04:46,046] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2022-11-25 19:04:46,046] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +5: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:04:46,046] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:04:46,047] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 19:04:46,047] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2022-11-25 19:04:46,047] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:04:46,047] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 19:04:46,047] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2022-11-25 19:04:46,047] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:04:46,047] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 19:04:46,047] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2022-11-25 19:04:46,047] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:04:46,047] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +0: [2022-11-25 19:04:46,047] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:04:46,047] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2022-11-25 19:04:46,047] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 19:04:46,047] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2022-11-25 19:04:46,047] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:04:46,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2022-11-25 19:04:46,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2022-11-25 19:04:46,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:04:46,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2022-11-25 19:04:46,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2022-11-25 19:04:46,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:04:46,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3: [2022-11-25 19:04:46,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:04:46,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +3: [2022-11-25 19:04:46,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 19:04:46,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +3: [2022-11-25 19:04:46,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:04:46,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 19:04:46,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:04:46,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:04:46,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:04:46,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:04:46,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +5: [2022-11-25 19:04:46,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +7: [2022-11-25 19:04:46,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 19:04:46,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 19:04:46,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 19:04:46,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +6: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:04:46,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:04:46,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 19:04:46,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 19:04:46,050] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2022-11-25 19:04:46,050] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2022-11-25 19:04:46,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +2: [2022-11-25 19:04:46,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:04:46,050] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2022-11-25 19:04:46,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2022-11-25 19:04:46,050] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2022-11-25 19:04:46,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:04:46,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 19:04:46,050] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2022-11-25 19:04:46,051] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:04:46,045] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +0: [2022-11-25 19:04:46,051] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1: [2022-11-25 19:04:46,045] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2022-11-25 19:04:46,045] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:04:46,045] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +0: [2022-11-25 19:04:46,051] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2022-11-25 19:04:46,045] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2022-11-25 19:04:46,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:04:46,051] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:04:46,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 19:04:46,050] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2022-11-25 19:04:46,051] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:04:46,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:04:46,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +2: [2022-11-25 19:04:46,051] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +1: [2022-11-25 19:04:46,050] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2022-11-25 19:04:46,051] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2022-11-25 19:04:46,051] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2022-11-25 19:04:46,051] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2022-11-25 19:04:46,051] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:04:46,051] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 19:04:46,051] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2022-11-25 19:04:46,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:04:46,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 19:04:46,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2022-11-25 19:04:46,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:04:46,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:04:46,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2022-11-25 19:04:46,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2022-11-25 19:04:46,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2022-11-25 19:04:46,053] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2022-11-25 19:04:46,053] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:04:46,053] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:04:46,053] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:04:46,053] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 19:04:46,053] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +7: [2022-11-25 19:04:46,053] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +4: [2022-11-25 19:04:46,053] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2022-11-25 19:04:46,053] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2022-11-25 19:04:46,053] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2022-11-25 19:04:46,055] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:04:46,055] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 19:04:46,055] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2022-11-25 19:04:46,055] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:04:46,055] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 19:04:46,056] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2022-11-25 19:04:46,061] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:04:46,061] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 19:04:46,061] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2022-11-25 19:04:46,063] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:04:46,063] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:04:46,063] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 19:04:46,063] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 19:04:46,063] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2022-11-25 19:04:46,063] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +3: [2022-11-25 19:04:46,066] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:04:46,066] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 19:04:46,066] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2022-11-25 19:04:46,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step18000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 19:04:46,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: successfully saved checkpoint at iteration 18000 to checkpoints_146m +7: time (ms) | save-checkpoint: 520.29 +7: iteration 18010/ 21553 | consumed samples: 4610560 | consumed tokens: 9442426880 | elapsed time per iteration (s): 0.37 | learning rate: 3.197E-05 | global batch size: 256 | lm loss: 2.549757E+00 | grad norm: 0.312 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 699.203 | TFLOPs: 24.48 | +7: iteration 18020/ 21553 | consumed samples: 4613120 | consumed tokens: 9447669760 | elapsed time per iteration (s): 0.30 | learning rate: 3.190E-05 | global batch size: 256 | lm loss: 2.538001E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.791 | TFLOPs: 29.50 | +7: iteration 18030/ 21553 | consumed samples: 4615680 | consumed tokens: 9452912640 | elapsed time per iteration (s): 0.31 | learning rate: 3.184E-05 | global batch size: 256 | lm loss: 2.508065E+00 | grad norm: 0.268 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.414 | TFLOPs: 29.07 | +7: iteration 18040/ 21553 | consumed samples: 4618240 | consumed tokens: 9458155520 | elapsed time per iteration (s): 0.31 | learning rate: 3.177E-05 | global batch size: 256 | lm loss: 2.531267E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.970 | TFLOPs: 29.26 | +7: iteration 18050/ 21553 | consumed samples: 4620800 | consumed tokens: 9463398400 | elapsed time per iteration (s): 0.31 | learning rate: 3.171E-05 | global batch size: 256 | lm loss: 2.531486E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.025 | TFLOPs: 29.23 | +7: iteration 18060/ 21553 | consumed samples: 4623360 | consumed tokens: 9468641280 | elapsed time per iteration (s): 0.30 | learning rate: 3.164E-05 | global batch size: 256 | lm loss: 2.504081E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.886 | TFLOPs: 29.68 | +7: iteration 18070/ 21553 | consumed samples: 4625920 | consumed tokens: 9473884160 | elapsed time per iteration (s): 0.30 | learning rate: 3.158E-05 | global batch size: 256 | lm loss: 2.545333E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.222 | TFLOPs: 29.87 | +7: iteration 18080/ 21553 | consumed samples: 4628480 | consumed tokens: 9479127040 | elapsed time per iteration (s): 0.31 | learning rate: 3.151E-05 | global batch size: 256 | lm loss: 2.512323E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.640 | TFLOPs: 29.36 | +7: iteration 18090/ 21553 | consumed samples: 4631040 | consumed tokens: 9484369920 | elapsed time per iteration (s): 0.30 | learning rate: 3.145E-05 | global batch size: 256 | lm loss: 2.543336E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.304 | TFLOPs: 29.80 | +7: iteration 18100/ 21553 | consumed samples: 4633600 | consumed tokens: 9489612800 | elapsed time per iteration (s): 0.31 | learning rate: 3.138E-05 | global batch size: 256 | lm loss: 2.527475E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.617 | TFLOPs: 29.29 | +7: iteration 18110/ 21553 | consumed samples: 4636160 | consumed tokens: 9494855680 | elapsed time per iteration (s): 0.30 | learning rate: 3.132E-05 | global batch size: 256 | lm loss: 2.557806E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.475 | TFLOPs: 29.67 | +7: iteration 18120/ 21553 | consumed samples: 4638720 | consumed tokens: 9500098560 | elapsed time per iteration (s): 0.30 | learning rate: 3.125E-05 | global batch size: 256 | lm loss: 2.558797E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.596 | TFLOPs: 29.67 | +7: iteration 18130/ 21553 | consumed samples: 4641280 | consumed tokens: 9505341440 | elapsed time per iteration (s): 0.31 | learning rate: 3.119E-05 | global batch size: 256 | lm loss: 2.545628E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.903 | TFLOPs: 29.30 | +7: iteration 18140/ 21553 | consumed samples: 4643840 | consumed tokens: 9510584320 | elapsed time per iteration (s): 0.31 | learning rate: 3.113E-05 | global batch size: 256 | lm loss: 2.508401E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.365 | TFLOPs: 28.68 | +7: iteration 18150/ 21553 | consumed samples: 4646400 | consumed tokens: 9515827200 | elapsed time per iteration (s): 0.32 | learning rate: 3.106E-05 | global batch size: 256 | lm loss: 2.498116E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 812.278 | TFLOPs: 28.44 | +7: iteration 18160/ 21553 | consumed samples: 4648960 | consumed tokens: 9521070080 | elapsed time per iteration (s): 0.31 | learning rate: 3.100E-05 | global batch size: 256 | lm loss: 2.485026E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.415 | TFLOPs: 28.55 | +7: iteration 18170/ 21553 | consumed samples: 4651520 | consumed tokens: 9526312960 | elapsed time per iteration (s): 0.30 | learning rate: 3.094E-05 | global batch size: 256 | lm loss: 2.506595E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.240 | TFLOPs: 29.80 | +7: iteration 18180/ 21553 | consumed samples: 4654080 | consumed tokens: 9531555840 | elapsed time per iteration (s): 0.30 | learning rate: 3.087E-05 | global batch size: 256 | lm loss: 2.513474E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.329 | TFLOPs: 29.73 | +7: iteration 18190/ 21553 | consumed samples: 4656640 | consumed tokens: 9536798720 | elapsed time per iteration (s): 0.30 | learning rate: 3.081E-05 | global batch size: 256 | lm loss: 2.513542E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.715 | TFLOPs: 29.82 | +7: iteration 18200/ 21553 | consumed samples: 4659200 | consumed tokens: 9542041600 | elapsed time per iteration (s): 0.30 | learning rate: 3.075E-05 | global batch size: 256 | lm loss: 2.547582E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.230 | TFLOPs: 29.66 | +7: iteration 18210/ 21553 | consumed samples: 4661760 | consumed tokens: 9547284480 | elapsed time per iteration (s): 0.31 | learning rate: 3.068E-05 | global batch size: 256 | lm loss: 2.489820E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.967 | TFLOPs: 28.98 | +7: iteration 18220/ 21553 | consumed samples: 4664320 | consumed tokens: 9552527360 | elapsed time per iteration (s): 0.31 | learning rate: 3.062E-05 | global batch size: 256 | lm loss: 2.556626E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.584 | TFLOPs: 29.32 | +7: iteration 18230/ 21553 | consumed samples: 4666880 | consumed tokens: 9557770240 | elapsed time per iteration (s): 0.31 | learning rate: 3.056E-05 | global batch size: 256 | lm loss: 2.509959E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.348 | TFLOPs: 29.21 | +7: iteration 18240/ 21553 | consumed samples: 4669440 | consumed tokens: 9563013120 | elapsed time per iteration (s): 0.31 | learning rate: 3.050E-05 | global batch size: 256 | lm loss: 2.503845E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.168 | TFLOPs: 29.34 | +7: iteration 18250/ 21553 | consumed samples: 4672000 | consumed tokens: 9568256000 | elapsed time per iteration (s): 0.31 | learning rate: 3.043E-05 | global batch size: 256 | lm loss: 2.528546E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.268 | TFLOPs: 29.38 | +7: iteration 18260/ 21553 | consumed samples: 4674560 | consumed tokens: 9573498880 | elapsed time per iteration (s): 0.30 | learning rate: 3.037E-05 | global batch size: 256 | lm loss: 2.488095E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.973 | TFLOPs: 29.79 | +7: iteration 18270/ 21553 | consumed samples: 4677120 | consumed tokens: 9578741760 | elapsed time per iteration (s): 0.31 | learning rate: 3.031E-05 | global batch size: 256 | lm loss: 2.510667E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.200 | TFLOPs: 29.38 | +7: iteration 18280/ 21553 | consumed samples: 4679680 | consumed tokens: 9583984640 | elapsed time per iteration (s): 0.31 | learning rate: 3.025E-05 | global batch size: 256 | lm loss: 2.541334E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.671 | TFLOPs: 29.36 | +7: iteration 18290/ 21553 | consumed samples: 4682240 | consumed tokens: 9589227520 | elapsed time per iteration (s): 0.31 | learning rate: 3.019E-05 | global batch size: 256 | lm loss: 2.523886E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.537 | TFLOPs: 29.35 | +7: iteration 18300/ 21553 | consumed samples: 4684800 | consumed tokens: 9594470400 | elapsed time per iteration (s): 0.31 | learning rate: 3.013E-05 | global batch size: 256 | lm loss: 2.547338E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.791 | TFLOPs: 28.94 | +7: iteration 18310/ 21553 | consumed samples: 4687360 | consumed tokens: 9599713280 | elapsed time per iteration (s): 0.31 | learning rate: 3.007E-05 | global batch size: 256 | lm loss: 2.558762E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.815 | TFLOPs: 28.94 | +7: iteration 18320/ 21553 | consumed samples: 4689920 | consumed tokens: 9604956160 | elapsed time per iteration (s): 0.30 | learning rate: 3.001E-05 | global batch size: 256 | lm loss: 2.504689E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.787 | TFLOPs: 29.75 | +7: iteration 18330/ 21553 | consumed samples: 4692480 | consumed tokens: 9610199040 | elapsed time per iteration (s): 0.32 | learning rate: 2.994E-05 | global batch size: 256 | lm loss: 2.527962E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 812.413 | TFLOPs: 28.44 | +7: iteration 18340/ 21553 | consumed samples: 4695040 | consumed tokens: 9615441920 | elapsed time per iteration (s): 0.31 | learning rate: 2.988E-05 | global batch size: 256 | lm loss: 2.528691E+00 | grad norm: 0.267 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.066 | TFLOPs: 29.02 | +7: iteration 18350/ 21553 | consumed samples: 4697600 | consumed tokens: 9620684800 | elapsed time per iteration (s): 0.31 | learning rate: 2.982E-05 | global batch size: 256 | lm loss: 2.535941E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.492 | TFLOPs: 29.32 | +7: iteration 18360/ 21553 | consumed samples: 4700160 | consumed tokens: 9625927680 | elapsed time per iteration (s): 0.30 | learning rate: 2.976E-05 | global batch size: 256 | lm loss: 2.499235E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.856 | TFLOPs: 29.58 | +7: iteration 18370/ 21553 | consumed samples: 4702720 | consumed tokens: 9631170560 | elapsed time per iteration (s): 0.31 | learning rate: 2.970E-05 | global batch size: 256 | lm loss: 2.561516E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.218 | TFLOPs: 28.99 | +7: iteration 18380/ 21553 | consumed samples: 4705280 | consumed tokens: 9636413440 | elapsed time per iteration (s): 0.31 | learning rate: 2.964E-05 | global batch size: 256 | lm loss: 2.529496E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.491 | TFLOPs: 28.83 | +7: iteration 18390/ 21553 | consumed samples: 4707840 | consumed tokens: 9641656320 | elapsed time per iteration (s): 0.31 | learning rate: 2.958E-05 | global batch size: 256 | lm loss: 2.548573E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.163 | TFLOPs: 29.34 | +7: iteration 18400/ 21553 | consumed samples: 4710400 | consumed tokens: 9646899200 | elapsed time per iteration (s): 0.31 | learning rate: 2.953E-05 | global batch size: 256 | lm loss: 2.519832E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.087 | TFLOPs: 29.37 | +7: iteration 18410/ 21553 | consumed samples: 4712960 | consumed tokens: 9652142080 | elapsed time per iteration (s): 0.31 | learning rate: 2.947E-05 | global batch size: 256 | lm loss: 2.494648E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.760 | TFLOPs: 28.59 | +7: iteration 18420/ 21553 | consumed samples: 4715520 | consumed tokens: 9657384960 | elapsed time per iteration (s): 0.31 | learning rate: 2.941E-05 | global batch size: 256 | lm loss: 2.523516E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.228 | TFLOPs: 29.27 | +7: iteration 18430/ 21553 | consumed samples: 4718080 | consumed tokens: 9662627840 | elapsed time per iteration (s): 0.30 | learning rate: 2.935E-05 | global batch size: 256 | lm loss: 2.539376E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.960 | TFLOPs: 29.44 | +7: iteration 18440/ 21553 | consumed samples: 4720640 | consumed tokens: 9667870720 | elapsed time per iteration (s): 0.30 | learning rate: 2.929E-05 | global batch size: 256 | lm loss: 2.548992E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.104 | TFLOPs: 29.97 | +7: iteration 18450/ 21553 | consumed samples: 4723200 | consumed tokens: 9673113600 | elapsed time per iteration (s): 0.31 | learning rate: 2.923E-05 | global batch size: 256 | lm loss: 2.507101E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.957 | TFLOPs: 29.05 | +7: iteration 18460/ 21553 | consumed samples: 4725760 | consumed tokens: 9678356480 | elapsed time per iteration (s): 0.30 | learning rate: 2.917E-05 | global batch size: 256 | lm loss: 2.497760E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.119 | TFLOPs: 29.62 | +7: iteration 18470/ 21553 | consumed samples: 4728320 | consumed tokens: 9683599360 | elapsed time per iteration (s): 0.30 | learning rate: 2.911E-05 | global batch size: 256 | lm loss: 2.515434E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.780 | TFLOPs: 29.82 | +7: iteration 18480/ 21553 | consumed samples: 4730880 | consumed tokens: 9688842240 | elapsed time per iteration (s): 0.31 | learning rate: 2.906E-05 | global batch size: 256 | lm loss: 2.506378E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.338 | TFLOPs: 29.24 | +7: iteration 18490/ 21553 | consumed samples: 4733440 | consumed tokens: 9694085120 | elapsed time per iteration (s): 0.30 | learning rate: 2.900E-05 | global batch size: 256 | lm loss: 2.519620E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.671 | TFLOPs: 29.53 | +7: iteration 18500/ 21553 | consumed samples: 4736000 | consumed tokens: 9699328000 | elapsed time per iteration (s): 0.30 | learning rate: 2.894E-05 | global batch size: 256 | lm loss: 2.530023E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.021 | TFLOPs: 29.90 | +7: iteration 18510/ 21553 | consumed samples: 4738560 | consumed tokens: 9704570880 | elapsed time per iteration (s): 0.31 | learning rate: 2.888E-05 | global batch size: 256 | lm loss: 2.519214E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.649 | TFLOPs: 29.32 | +7: iteration 18520/ 21553 | consumed samples: 4741120 | consumed tokens: 9709813760 | elapsed time per iteration (s): 0.30 | learning rate: 2.883E-05 | global batch size: 256 | lm loss: 2.493200E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.452 | TFLOPs: 29.81 | +7: iteration 18530/ 21553 | consumed samples: 4743680 | consumed tokens: 9715056640 | elapsed time per iteration (s): 0.30 | learning rate: 2.877E-05 | global batch size: 256 | lm loss: 2.525378E+00 | grad norm: 0.261 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.377 | TFLOPs: 29.56 | +7: iteration 18540/ 21553 | consumed samples: 4746240 | consumed tokens: 9720299520 | elapsed time per iteration (s): 0.32 | learning rate: 2.871E-05 | global batch size: 256 | lm loss: 2.539714E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 810.003 | TFLOPs: 28.36 | +7: iteration 18550/ 21553 | consumed samples: 4748800 | consumed tokens: 9725542400 | elapsed time per iteration (s): 0.30 | learning rate: 2.865E-05 | global batch size: 256 | lm loss: 2.541918E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.424 | TFLOPs: 29.70 | +7: iteration 18560/ 21553 | consumed samples: 4751360 | consumed tokens: 9730785280 | elapsed time per iteration (s): 0.30 | learning rate: 2.860E-05 | global batch size: 256 | lm loss: 2.535485E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.882 | TFLOPs: 29.58 | +7: iteration 18570/ 21553 | consumed samples: 4753920 | consumed tokens: 9736028160 | elapsed time per iteration (s): 0.30 | learning rate: 2.854E-05 | global batch size: 256 | lm loss: 2.498464E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.455 | TFLOPs: 29.91 | +7: iteration 18580/ 21553 | consumed samples: 4756480 | consumed tokens: 9741271040 | elapsed time per iteration (s): 0.30 | learning rate: 2.849E-05 | global batch size: 256 | lm loss: 2.542530E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.653 | TFLOPs: 29.88 | +7: iteration 18590/ 21553 | consumed samples: 4759040 | consumed tokens: 9746513920 | elapsed time per iteration (s): 0.30 | learning rate: 2.843E-05 | global batch size: 256 | lm loss: 2.511047E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.529 | TFLOPs: 29.49 | +7: iteration 18600/ 21553 | consumed samples: 4761600 | consumed tokens: 9751756800 | elapsed time per iteration (s): 0.30 | learning rate: 2.837E-05 | global batch size: 256 | lm loss: 2.513887E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.566 | TFLOPs: 29.43 | +7: iteration 18610/ 21553 | consumed samples: 4764160 | consumed tokens: 9756999680 | elapsed time per iteration (s): 0.30 | learning rate: 2.832E-05 | global batch size: 256 | lm loss: 2.534588E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.116 | TFLOPs: 29.59 | +7: iteration 18620/ 21553 | consumed samples: 4766720 | consumed tokens: 9762242560 | elapsed time per iteration (s): 0.31 | learning rate: 2.826E-05 | global batch size: 256 | lm loss: 2.525809E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.290 | TFLOPs: 29.35 | +7: iteration 18630/ 21553 | consumed samples: 4769280 | consumed tokens: 9767485440 | elapsed time per iteration (s): 0.31 | learning rate: 2.821E-05 | global batch size: 256 | lm loss: 2.533989E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.380 | TFLOPs: 28.93 | +7: iteration 18640/ 21553 | consumed samples: 4771840 | consumed tokens: 9772728320 | elapsed time per iteration (s): 0.32 | learning rate: 2.815E-05 | global batch size: 256 | lm loss: 2.523492E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 810.552 | TFLOPs: 28.38 | +7: iteration 18650/ 21553 | consumed samples: 4774400 | consumed tokens: 9777971200 | elapsed time per iteration (s): 0.30 | learning rate: 2.810E-05 | global batch size: 256 | lm loss: 2.522523E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.934 | TFLOPs: 29.96 | +7: iteration 18660/ 21553 | consumed samples: 4776960 | consumed tokens: 9783214080 | elapsed time per iteration (s): 0.30 | learning rate: 2.804E-05 | global batch size: 256 | lm loss: 2.546627E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.485 | TFLOPs: 29.95 | +7: iteration 18670/ 21553 | consumed samples: 4779520 | consumed tokens: 9788456960 | elapsed time per iteration (s): 0.30 | learning rate: 2.799E-05 | global batch size: 256 | lm loss: 2.502651E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.463 | TFLOPs: 29.46 | +7: iteration 18680/ 21553 | consumed samples: 4782080 | consumed tokens: 9793699840 | elapsed time per iteration (s): 0.30 | learning rate: 2.793E-05 | global batch size: 256 | lm loss: 2.574278E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.903 | TFLOPs: 29.47 | +7: iteration 18690/ 21553 | consumed samples: 4784640 | consumed tokens: 9798942720 | elapsed time per iteration (s): 0.31 | learning rate: 2.788E-05 | global batch size: 256 | lm loss: 2.520304E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.069 | TFLOPs: 28.92 | +7: iteration 18700/ 21553 | consumed samples: 4787200 | consumed tokens: 9804185600 | elapsed time per iteration (s): 0.31 | learning rate: 2.782E-05 | global batch size: 256 | lm loss: 2.530860E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.451 | TFLOPs: 28.86 | +7: iteration 18710/ 21553 | consumed samples: 4789760 | consumed tokens: 9809428480 | elapsed time per iteration (s): 0.30 | learning rate: 2.777E-05 | global batch size: 256 | lm loss: 2.539957E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.358 | TFLOPs: 29.49 | +7: iteration 18720/ 21553 | consumed samples: 4792320 | consumed tokens: 9814671360 | elapsed time per iteration (s): 0.30 | learning rate: 2.772E-05 | global batch size: 256 | lm loss: 2.526420E+00 | grad norm: 0.296 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.712 | TFLOPs: 29.50 | +7: iteration 18730/ 21553 | consumed samples: 4794880 | consumed tokens: 9819914240 | elapsed time per iteration (s): 0.30 | learning rate: 2.766E-05 | global batch size: 256 | lm loss: 2.523382E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.517 | TFLOPs: 29.60 | +7: iteration 18740/ 21553 | consumed samples: 4797440 | consumed tokens: 9825157120 | elapsed time per iteration (s): 0.31 | learning rate: 2.761E-05 | global batch size: 256 | lm loss: 2.530583E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.552 | TFLOPs: 29.04 | +7: iteration 18750/ 21553 | consumed samples: 4800000 | consumed tokens: 9830400000 | elapsed time per iteration (s): 0.31 | learning rate: 2.756E-05 | global batch size: 256 | lm loss: 2.520280E+00 | grad norm: 0.266 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.764 | TFLOPs: 29.33 | +7: iteration 18760/ 21553 | consumed samples: 4802560 | consumed tokens: 9835642880 | elapsed time per iteration (s): 0.30 | learning rate: 2.750E-05 | global batch size: 256 | lm loss: 2.543805E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.036 | TFLOPs: 30.00 | +7: iteration 18770/ 21553 | consumed samples: 4805120 | consumed tokens: 9840885760 | elapsed time per iteration (s): 0.30 | learning rate: 2.745E-05 | global batch size: 256 | lm loss: 2.482108E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.947 | TFLOPs: 29.72 | +7: iteration 18780/ 21553 | consumed samples: 4807680 | consumed tokens: 9846128640 | elapsed time per iteration (s): 0.31 | learning rate: 2.740E-05 | global batch size: 256 | lm loss: 2.547210E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.721 | TFLOPs: 29.26 | +7: iteration 18790/ 21553 | consumed samples: 4810240 | consumed tokens: 9851371520 | elapsed time per iteration (s): 0.32 | learning rate: 2.735E-05 | global batch size: 256 | lm loss: 2.522435E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 803.421 | TFLOPs: 28.13 | +7: iteration 18800/ 21553 | consumed samples: 4812800 | consumed tokens: 9856614400 | elapsed time per iteration (s): 0.31 | learning rate: 2.729E-05 | global batch size: 256 | lm loss: 2.528795E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.708 | TFLOPs: 29.01 | +7: iteration 18810/ 21553 | consumed samples: 4815360 | consumed tokens: 9861857280 | elapsed time per iteration (s): 0.31 | learning rate: 2.724E-05 | global batch size: 256 | lm loss: 2.487504E+00 | grad norm: 0.268 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.462 | TFLOPs: 28.97 | +7: iteration 18820/ 21553 | consumed samples: 4817920 | consumed tokens: 9867100160 | elapsed time per iteration (s): 0.31 | learning rate: 2.719E-05 | global batch size: 256 | lm loss: 2.538121E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.955 | TFLOPs: 29.37 | +7: iteration 18830/ 21553 | consumed samples: 4820480 | consumed tokens: 9872343040 | elapsed time per iteration (s): 0.31 | learning rate: 2.714E-05 | global batch size: 256 | lm loss: 2.529882E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.023 | TFLOPs: 29.20 | +7: iteration 18840/ 21553 | consumed samples: 4823040 | consumed tokens: 9877585920 | elapsed time per iteration (s): 0.31 | learning rate: 2.709E-05 | global batch size: 256 | lm loss: 2.488906E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.123 | TFLOPs: 29.24 | +7: iteration 18850/ 21553 | consumed samples: 4825600 | consumed tokens: 9882828800 | elapsed time per iteration (s): 0.30 | learning rate: 2.703E-05 | global batch size: 256 | lm loss: 2.522414E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.548 | TFLOPs: 29.85 | +7: iteration 18860/ 21553 | consumed samples: 4828160 | consumed tokens: 9888071680 | elapsed time per iteration (s): 0.31 | learning rate: 2.698E-05 | global batch size: 256 | lm loss: 2.518733E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.405 | TFLOPs: 29.00 | +7: iteration 18870/ 21553 | consumed samples: 4830720 | consumed tokens: 9893314560 | elapsed time per iteration (s): 0.30 | learning rate: 2.693E-05 | global batch size: 256 | lm loss: 2.527552E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.397 | TFLOPs: 29.81 | +7: iteration 18880/ 21553 | consumed samples: 4833280 | consumed tokens: 9898557440 | elapsed time per iteration (s): 0.31 | learning rate: 2.688E-05 | global batch size: 256 | lm loss: 2.522685E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.241 | TFLOPs: 28.61 | +7: iteration 18890/ 21553 | consumed samples: 4835840 | consumed tokens: 9903800320 | elapsed time per iteration (s): 0.30 | learning rate: 2.683E-05 | global batch size: 256 | lm loss: 2.543887E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.398 | TFLOPs: 29.42 | +7: iteration 18900/ 21553 | consumed samples: 4838400 | consumed tokens: 9909043200 | elapsed time per iteration (s): 0.30 | learning rate: 2.678E-05 | global batch size: 256 | lm loss: 2.510353E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.265 | TFLOPs: 29.94 | +7: iteration 18910/ 21553 | consumed samples: 4840960 | consumed tokens: 9914286080 | elapsed time per iteration (s): 0.30 | learning rate: 2.673E-05 | global batch size: 256 | lm loss: 2.526708E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.867 | TFLOPs: 29.47 | +7: iteration 18920/ 21553 | consumed samples: 4843520 | consumed tokens: 9919528960 | elapsed time per iteration (s): 0.30 | learning rate: 2.668E-05 | global batch size: 256 | lm loss: 2.500616E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.872 | TFLOPs: 29.72 | +7: iteration 18930/ 21553 | consumed samples: 4846080 | consumed tokens: 9924771840 | elapsed time per iteration (s): 0.30 | learning rate: 2.663E-05 | global batch size: 256 | lm loss: 2.514333E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.436 | TFLOPs: 29.39 | +7: iteration 18940/ 21553 | consumed samples: 4848640 | consumed tokens: 9930014720 | elapsed time per iteration (s): 0.30 | learning rate: 2.658E-05 | global batch size: 256 | lm loss: 2.561167E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.568 | TFLOPs: 29.67 | +7: iteration 18950/ 21553 | consumed samples: 4851200 | consumed tokens: 9935257600 | elapsed time per iteration (s): 0.30 | learning rate: 2.653E-05 | global batch size: 256 | lm loss: 2.518439E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.038 | TFLOPs: 29.97 | +7: iteration 18960/ 21553 | consumed samples: 4853760 | consumed tokens: 9940500480 | elapsed time per iteration (s): 0.30 | learning rate: 2.648E-05 | global batch size: 256 | lm loss: 2.490033E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.851 | TFLOPs: 29.75 | +7: iteration 18970/ 21553 | consumed samples: 4856320 | consumed tokens: 9945743360 | elapsed time per iteration (s): 0.30 | learning rate: 2.643E-05 | global batch size: 256 | lm loss: 2.544547E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.503 | TFLOPs: 29.46 | +7: iteration 18980/ 21553 | consumed samples: 4858880 | consumed tokens: 9950986240 | elapsed time per iteration (s): 0.30 | learning rate: 2.638E-05 | global batch size: 256 | lm loss: 2.543388E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.119 | TFLOPs: 30.08 | +7: iteration 18990/ 21553 | consumed samples: 4861440 | consumed tokens: 9956229120 | elapsed time per iteration (s): 0.30 | learning rate: 2.633E-05 | global batch size: 256 | lm loss: 2.532821E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.932 | TFLOPs: 30.00 | +7: iteration 19000/ 21553 | consumed samples: 4864000 | consumed tokens: 9961472000 | elapsed time per iteration (s): 0.30 | learning rate: 2.628E-05 | global batch size: 256 | lm loss: 2.548034E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.451 | TFLOPs: 29.53 | +7: ------------------------------------------------------------------------------------------- +7: valid loss at iteration 19000 | lm loss value: 2.435968E+00 | lm loss PPL: 1.142687E+01 | +7: ------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 19000 to checkpoints_146m +0: [2022-11-25 19:09:51,228] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step19000 is begin to save! +0: [2022-11-25 19:09:51,231] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_01-model_00-model_states.pt... +0: [2022-11-25 19:09:51,372] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_01-model_00-model_states.pt. +0: [2022-11-25 19:09:51,373] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_03-model_00-model_states.pt... +0: [2022-11-25 19:09:51,395] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_03-model_00-model_states.pt. +0: [2022-11-25 19:09:51,395] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_04-model_00-model_states.pt... +0: [2022-11-25 19:09:51,417] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_04-model_00-model_states.pt. +0: [2022-11-25 19:09:51,417] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_05-model_00-model_states.pt... +0: [2022-11-25 19:09:51,438] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_05-model_00-model_states.pt. +0: [2022-11-25 19:09:51,438] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_06-model_00-model_states.pt... +0: [2022-11-25 19:09:51,454] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_06-model_00-model_states.pt. +0: [2022-11-25 19:09:51,454] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_07-model_00-model_states.pt... +0: [2022-11-25 19:09:51,469] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_07-model_00-model_states.pt. +0: [2022-11-25 19:09:51,470] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_08-model_00-model_states.pt... +0: [2022-11-25 19:09:51,484] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_08-model_00-model_states.pt. +0: [2022-11-25 19:09:51,485] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_09-model_00-model_states.pt... +0: [2022-11-25 19:09:51,499] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_09-model_00-model_states.pt. +0: [2022-11-25 19:09:51,500] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_10-model_00-model_states.pt... +0: [2022-11-25 19:09:51,514] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_10-model_00-model_states.pt. +0: [2022-11-25 19:09:51,515] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_11-model_00-model_states.pt... +0: [2022-11-25 19:09:51,529] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_11-model_00-model_states.pt. +0: [2022-11-25 19:09:51,530] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_12-model_00-model_states.pt... +0: [2022-11-25 19:09:51,544] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_12-model_00-model_states.pt. +0: [2022-11-25 19:09:51,545] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_13-model_00-model_states.pt... +0: [2022-11-25 19:09:51,566] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_13-model_00-model_states.pt. +0: [2022-11-25 19:09:51,566] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_14-model_00-model_states.pt... +0: [2022-11-25 19:09:51,587] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_14-model_00-model_states.pt. +0: [2022-11-25 19:09:51,587] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_15-model_00-model_states.pt... +0: [2022-11-25 19:09:51,608] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_15-model_00-model_states.pt. +0: [2022-11-25 19:09:51,608] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_16-model_00-model_states.pt... +0: [2022-11-25 19:09:51,625] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_16-model_00-model_states.pt. +0: [2022-11-25 19:09:51,626] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_17-model_00-model_states.pt... +0: [2022-11-25 19:09:51,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_17-model_00-model_states.pt. +0: [2022-11-25 19:09:51,641] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/layer_19-model_00-model_states.pt... +0: [2022-11-25 19:09:51,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/layer_19-model_00-model_states.pt. +0: [2022-11-25 19:09:51,642] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step19000/mp_rank_00_model_states.pt +0: [2022-11-25 19:09:51,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/mp_rank_00_model_states.pt... +0: [2022-11-25 19:09:51,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/mp_rank_00_model_states.pt. +0: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:09:51,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step19000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:09:51,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:09:51,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 19:09:51,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2022-11-25 19:09:51,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:09:51,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 19:09:51,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2022-11-25 19:09:51,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:09:51,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 19:09:51,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2022-11-25 19:09:51,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:09:51,699] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +7: [2022-11-25 19:09:51,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:09:51,699] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +1: [2022-11-25 19:09:51,699] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2022-11-25 19:09:51,699] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2022-11-25 19:09:51,699] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:09:51,699] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 19:09:51,699] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2022-11-25 19:09:51,699] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:09:51,699] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 19:09:51,699] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2022-11-25 19:09:51,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:09:51,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 19:09:51,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2022-11-25 19:09:51,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:09:51,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 19:09:51,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2022-11-25 19:09:51,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:09:51,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 19:09:51,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2022-11-25 19:09:51,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:09:51,701] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 19:09:51,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2022-11-25 19:09:51,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:09:51,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:09:51,701] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 19:09:51,701] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 19:09:51,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2022-11-25 19:09:51,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2022-11-25 19:09:51,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:09:51,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 19:09:51,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2022-11-25 19:09:51,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:09:51,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 19:09:51,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2022-11-25 19:09:51,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:09:51,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:09:51,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 19:09:51,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2022-11-25 19:09:51,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:09:51,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 19:09:51,703] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2022-11-25 19:09:51,703] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:09:51,703] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 19:09:51,703] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2022-11-25 19:09:51,703] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:09:51,703] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 19:09:51,703] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2022-11-25 19:09:51,703] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:09:51,703] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +1: [2022-11-25 19:09:51,703] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:09:51,703] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2022-11-25 19:09:51,703] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:09:51,704] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +0: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2022-11-25 19:09:51,704] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +0: [2022-11-25 19:09:51,704] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +5: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:09:51,704] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:09:51,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:09:51,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:09:51,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:09:51,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +1: [2022-11-25 19:09:51,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2022-11-25 19:09:51,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 19:09:51,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2022-11-25 19:09:51,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2022-11-25 19:09:51,706] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:09:51,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 19:09:51,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2022-11-25 19:09:51,706] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:09:51,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 19:09:51,706] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:09:51,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2022-11-25 19:09:51,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 19:09:51,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2022-11-25 19:09:51,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:09:51,707] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 19:09:51,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2022-11-25 19:09:51,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:09:51,707] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 19:09:51,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2022-11-25 19:09:51,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:09:51,707] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 19:09:51,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2022-11-25 19:09:51,695] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:09:51,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:09:51,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:09:51,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:09:51,695] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +2: [2022-11-25 19:09:51,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +6: [2022-11-25 19:09:51,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +4: [2022-11-25 19:09:51,695] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2022-11-25 19:09:51,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +6: [2022-11-25 19:09:51,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:09:51,695] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:09:51,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2022-11-25 19:09:51,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2022-11-25 19:09:51,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2022-11-25 19:09:51,695] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +2: [2022-11-25 19:09:51,699] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:09:51,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +4: [2022-11-25 19:09:51,695] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2022-11-25 19:09:51,699] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +6: [2022-11-25 19:09:51,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2022-11-25 19:09:51,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:09:51,699] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2022-11-25 19:09:51,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:09:51,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +2: [2022-11-25 19:09:51,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:09:51,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +4: [2022-11-25 19:09:51,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2022-11-25 19:09:51,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +6: [2022-11-25 19:09:51,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2022-11-25 19:09:51,699] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:09:51,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2022-11-25 19:09:51,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:09:51,699] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +2: [2022-11-25 19:09:51,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:09:51,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +4: [2022-11-25 19:09:51,699] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2022-11-25 19:09:51,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +6: [2022-11-25 19:09:51,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2022-11-25 19:09:51,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:09:51,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2022-11-25 19:09:51,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:09:51,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +2: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:09:51,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +4: [2022-11-25 19:09:51,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2022-11-25 19:09:51,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 19:09:51,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2022-11-25 19:09:51,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +6: [2022-11-25 19:09:51,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2022-11-25 19:09:51,703] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2022-11-25 19:09:51,703] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:09:51,703] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +6: [2022-11-25 19:09:51,703] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +4: [2022-11-25 19:09:51,703] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2022-11-25 19:09:51,703] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:09:51,704] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +6: [2022-11-25 19:09:51,704] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +4: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2022-11-25 19:09:51,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2022-11-25 19:09:51,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:09:51,707] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +6: [2022-11-25 19:09:51,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +4: [2022-11-25 19:09:51,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2022-11-25 19:09:51,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2022-11-25 19:09:51,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:09:51,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:09:51,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:09:51,712] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 19:09:51,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 19:09:51,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 19:09:51,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2022-11-25 19:09:51,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2022-11-25 19:09:51,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2022-11-25 19:09:51,727] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step19000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 19:09:51,727] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: successfully saved checkpoint at iteration 19000 to checkpoints_146m +7: time (ms) | save-checkpoint: 503.41 +7: iteration 19010/ 21553 | consumed samples: 4866560 | consumed tokens: 9966714880 | elapsed time per iteration (s): 0.36 | learning rate: 2.624E-05 | global batch size: 256 | lm loss: 2.549172E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 704.583 | TFLOPs: 24.67 | +7: iteration 19020/ 21553 | consumed samples: 4869120 | consumed tokens: 9971957760 | elapsed time per iteration (s): 0.30 | learning rate: 2.619E-05 | global batch size: 256 | lm loss: 2.541045E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.328 | TFLOPs: 29.49 | +7: iteration 19030/ 21553 | consumed samples: 4871680 | consumed tokens: 9977200640 | elapsed time per iteration (s): 0.30 | learning rate: 2.614E-05 | global batch size: 256 | lm loss: 2.506829E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.392 | TFLOPs: 29.73 | +7: iteration 19040/ 21553 | consumed samples: 4874240 | consumed tokens: 9982443520 | elapsed time per iteration (s): 0.30 | learning rate: 2.609E-05 | global batch size: 256 | lm loss: 2.544214E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.706 | TFLOPs: 30.03 | +7: iteration 19050/ 21553 | consumed samples: 4876800 | consumed tokens: 9987686400 | elapsed time per iteration (s): 0.30 | learning rate: 2.604E-05 | global batch size: 256 | lm loss: 2.509017E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.001 | TFLOPs: 29.69 | +7: iteration 19060/ 21553 | consumed samples: 4879360 | consumed tokens: 9992929280 | elapsed time per iteration (s): 0.30 | learning rate: 2.600E-05 | global batch size: 256 | lm loss: 2.511358E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.117 | TFLOPs: 29.52 | +7: iteration 19070/ 21553 | consumed samples: 4881920 | consumed tokens: 9998172160 | elapsed time per iteration (s): 0.30 | learning rate: 2.595E-05 | global batch size: 256 | lm loss: 2.499258E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.440 | TFLOPs: 29.91 | +7: iteration 19080/ 21553 | consumed samples: 4884480 | consumed tokens: 10003415040 | elapsed time per iteration (s): 0.31 | learning rate: 2.590E-05 | global batch size: 256 | lm loss: 2.531711E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.126 | TFLOPs: 29.38 | +7: iteration 19090/ 21553 | consumed samples: 4887040 | consumed tokens: 10008657920 | elapsed time per iteration (s): 0.31 | learning rate: 2.585E-05 | global batch size: 256 | lm loss: 2.548909E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.220 | TFLOPs: 29.34 | +7: iteration 19100/ 21553 | consumed samples: 4889600 | consumed tokens: 10013900800 | elapsed time per iteration (s): 0.30 | learning rate: 2.581E-05 | global batch size: 256 | lm loss: 2.553656E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.452 | TFLOPs: 29.49 | +7: iteration 19110/ 21553 | consumed samples: 4892160 | consumed tokens: 10019143680 | elapsed time per iteration (s): 0.30 | learning rate: 2.576E-05 | global batch size: 256 | lm loss: 2.554619E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.183 | TFLOPs: 29.59 | +7: iteration 19120/ 21553 | consumed samples: 4894720 | consumed tokens: 10024386560 | elapsed time per iteration (s): 0.31 | learning rate: 2.571E-05 | global batch size: 256 | lm loss: 2.517104E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.385 | TFLOPs: 28.79 | +7: iteration 19130/ 21553 | consumed samples: 4897280 | consumed tokens: 10029629440 | elapsed time per iteration (s): 0.32 | learning rate: 2.567E-05 | global batch size: 256 | lm loss: 2.511297E+00 | grad norm: 0.267 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 797.008 | TFLOPs: 27.90 | +7: iteration 19140/ 21553 | consumed samples: 4899840 | consumed tokens: 10034872320 | elapsed time per iteration (s): 0.31 | learning rate: 2.562E-05 | global batch size: 256 | lm loss: 2.515223E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.310 | TFLOPs: 29.38 | +7: iteration 19150/ 21553 | consumed samples: 4902400 | consumed tokens: 10040115200 | elapsed time per iteration (s): 0.32 | learning rate: 2.557E-05 | global batch size: 256 | lm loss: 2.507109E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 808.144 | TFLOPs: 28.29 | +7: iteration 19160/ 21553 | consumed samples: 4904960 | consumed tokens: 10045358080 | elapsed time per iteration (s): 0.30 | learning rate: 2.553E-05 | global batch size: 256 | lm loss: 2.533447E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.341 | TFLOPs: 29.73 | +7: iteration 19170/ 21553 | consumed samples: 4907520 | consumed tokens: 10050600960 | elapsed time per iteration (s): 0.30 | learning rate: 2.548E-05 | global batch size: 256 | lm loss: 2.510650E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.785 | TFLOPs: 29.43 | +7: iteration 19180/ 21553 | consumed samples: 4910080 | consumed tokens: 10055843840 | elapsed time per iteration (s): 0.30 | learning rate: 2.544E-05 | global batch size: 256 | lm loss: 2.529204E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.109 | TFLOPs: 29.76 | +7: iteration 19190/ 21553 | consumed samples: 4912640 | consumed tokens: 10061086720 | elapsed time per iteration (s): 0.30 | learning rate: 2.539E-05 | global batch size: 256 | lm loss: 2.530374E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.966 | TFLOPs: 29.82 | +7: iteration 19200/ 21553 | consumed samples: 4915200 | consumed tokens: 10066329600 | elapsed time per iteration (s): 0.31 | learning rate: 2.535E-05 | global batch size: 256 | lm loss: 2.530020E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.816 | TFLOPs: 29.36 | +7: iteration 19210/ 21553 | consumed samples: 4917760 | consumed tokens: 10071572480 | elapsed time per iteration (s): 0.31 | learning rate: 2.530E-05 | global batch size: 256 | lm loss: 2.521156E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.138 | TFLOPs: 29.20 | +7: iteration 19220/ 21553 | consumed samples: 4920320 | consumed tokens: 10076815360 | elapsed time per iteration (s): 0.31 | learning rate: 2.526E-05 | global batch size: 256 | lm loss: 2.524742E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.386 | TFLOPs: 28.58 | +7: iteration 19230/ 21553 | consumed samples: 4922880 | consumed tokens: 10082058240 | elapsed time per iteration (s): 0.31 | learning rate: 2.521E-05 | global batch size: 256 | lm loss: 2.524685E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.776 | TFLOPs: 29.36 | +7: iteration 19240/ 21553 | consumed samples: 4925440 | consumed tokens: 10087301120 | elapsed time per iteration (s): 0.31 | learning rate: 2.517E-05 | global batch size: 256 | lm loss: 2.527820E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.008 | TFLOPs: 29.30 | +7: iteration 19250/ 21553 | consumed samples: 4928000 | consumed tokens: 10092544000 | elapsed time per iteration (s): 0.31 | learning rate: 2.512E-05 | global batch size: 256 | lm loss: 2.554215E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.823 | TFLOPs: 29.08 | +7: iteration 19260/ 21553 | consumed samples: 4930560 | consumed tokens: 10097786880 | elapsed time per iteration (s): 0.31 | learning rate: 2.508E-05 | global batch size: 256 | lm loss: 2.514960E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.086 | TFLOPs: 29.37 | +7: iteration 19270/ 21553 | consumed samples: 4933120 | consumed tokens: 10103029760 | elapsed time per iteration (s): 0.30 | learning rate: 2.504E-05 | global batch size: 256 | lm loss: 2.495206E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.420 | TFLOPs: 29.42 | +7: iteration 19280/ 21553 | consumed samples: 4935680 | consumed tokens: 10108272640 | elapsed time per iteration (s): 0.31 | learning rate: 2.499E-05 | global batch size: 256 | lm loss: 2.515935E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.394 | TFLOPs: 28.54 | +7: iteration 19290/ 21553 | consumed samples: 4938240 | consumed tokens: 10113515520 | elapsed time per iteration (s): 0.31 | learning rate: 2.495E-05 | global batch size: 256 | lm loss: 2.533063E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.876 | TFLOPs: 29.37 | +7: iteration 19300/ 21553 | consumed samples: 4940800 | consumed tokens: 10118758400 | elapsed time per iteration (s): 0.30 | learning rate: 2.491E-05 | global batch size: 256 | lm loss: 2.519939E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.417 | TFLOPs: 29.46 | +7: iteration 19310/ 21553 | consumed samples: 4943360 | consumed tokens: 10124001280 | elapsed time per iteration (s): 0.31 | learning rate: 2.486E-05 | global batch size: 256 | lm loss: 2.513390E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.732 | TFLOPs: 29.08 | +7: iteration 19320/ 21553 | consumed samples: 4945920 | consumed tokens: 10129244160 | elapsed time per iteration (s): 0.31 | learning rate: 2.482E-05 | global batch size: 256 | lm loss: 2.501093E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.335 | TFLOPs: 29.03 | +7: iteration 19330/ 21553 | consumed samples: 4948480 | consumed tokens: 10134487040 | elapsed time per iteration (s): 0.31 | learning rate: 2.478E-05 | global batch size: 256 | lm loss: 2.496003E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.364 | TFLOPs: 29.28 | +7: iteration 19340/ 21553 | consumed samples: 4951040 | consumed tokens: 10139729920 | elapsed time per iteration (s): 0.31 | learning rate: 2.474E-05 | global batch size: 256 | lm loss: 2.525627E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.370 | TFLOPs: 29.07 | +7: iteration 19350/ 21553 | consumed samples: 4953600 | consumed tokens: 10144972800 | elapsed time per iteration (s): 0.31 | learning rate: 2.469E-05 | global batch size: 256 | lm loss: 2.516802E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.257 | TFLOPs: 29.10 | +7: iteration 19360/ 21553 | consumed samples: 4956160 | consumed tokens: 10150215680 | elapsed time per iteration (s): 0.32 | learning rate: 2.465E-05 | global batch size: 256 | lm loss: 2.512238E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 791.439 | TFLOPs: 27.71 | +7: iteration 19370/ 21553 | consumed samples: 4958720 | consumed tokens: 10155458560 | elapsed time per iteration (s): 0.31 | learning rate: 2.461E-05 | global batch size: 256 | lm loss: 2.513473E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.718 | TFLOPs: 28.49 | +7: iteration 19380/ 21553 | consumed samples: 4961280 | consumed tokens: 10160701440 | elapsed time per iteration (s): 0.31 | learning rate: 2.457E-05 | global batch size: 256 | lm loss: 2.493015E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.029 | TFLOPs: 28.53 | +7: iteration 19390/ 21553 | consumed samples: 4963840 | consumed tokens: 10165944320 | elapsed time per iteration (s): 0.30 | learning rate: 2.453E-05 | global batch size: 256 | lm loss: 2.522821E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.222 | TFLOPs: 29.76 | +7: iteration 19400/ 21553 | consumed samples: 4966400 | consumed tokens: 10171187200 | elapsed time per iteration (s): 0.30 | learning rate: 2.448E-05 | global batch size: 256 | lm loss: 2.523658E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.509 | TFLOPs: 29.60 | +7: iteration 19410/ 21553 | consumed samples: 4968960 | consumed tokens: 10176430080 | elapsed time per iteration (s): 0.31 | learning rate: 2.444E-05 | global batch size: 256 | lm loss: 2.527460E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.748 | TFLOPs: 29.36 | +7: iteration 19420/ 21553 | consumed samples: 4971520 | consumed tokens: 10181672960 | elapsed time per iteration (s): 0.30 | learning rate: 2.440E-05 | global batch size: 256 | lm loss: 2.532238E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.744 | TFLOPs: 29.71 | +7: iteration 19430/ 21553 | consumed samples: 4974080 | consumed tokens: 10186915840 | elapsed time per iteration (s): 0.31 | learning rate: 2.436E-05 | global batch size: 256 | lm loss: 2.546281E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.437 | TFLOPs: 29.25 | +7: iteration 19440/ 21553 | consumed samples: 4976640 | consumed tokens: 10192158720 | elapsed time per iteration (s): 0.30 | learning rate: 2.432E-05 | global batch size: 256 | lm loss: 2.526002E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.218 | TFLOPs: 29.48 | +7: iteration 19450/ 21553 | consumed samples: 4979200 | consumed tokens: 10197401600 | elapsed time per iteration (s): 0.30 | learning rate: 2.428E-05 | global batch size: 256 | lm loss: 2.539401E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.543 | TFLOPs: 29.50 | +7: iteration 19460/ 21553 | consumed samples: 4981760 | consumed tokens: 10202644480 | elapsed time per iteration (s): 0.31 | learning rate: 2.424E-05 | global batch size: 256 | lm loss: 2.511530E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.171 | TFLOPs: 28.96 | +7: iteration 19470/ 21553 | consumed samples: 4984320 | consumed tokens: 10207887360 | elapsed time per iteration (s): 0.30 | learning rate: 2.420E-05 | global batch size: 256 | lm loss: 2.498310E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.077 | TFLOPs: 29.90 | +7: iteration 19480/ 21553 | consumed samples: 4986880 | consumed tokens: 10213130240 | elapsed time per iteration (s): 0.31 | learning rate: 2.416E-05 | global batch size: 256 | lm loss: 2.504594E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.790 | TFLOPs: 29.33 | +7: iteration 19490/ 21553 | consumed samples: 4989440 | consumed tokens: 10218373120 | elapsed time per iteration (s): 0.30 | learning rate: 2.412E-05 | global batch size: 256 | lm loss: 2.525285E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.908 | TFLOPs: 29.51 | +7: iteration 19500/ 21553 | consumed samples: 4992000 | consumed tokens: 10223616000 | elapsed time per iteration (s): 0.31 | learning rate: 2.408E-05 | global batch size: 256 | lm loss: 2.507460E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.119 | TFLOPs: 28.89 | +7: iteration 19510/ 21553 | consumed samples: 4994560 | consumed tokens: 10228858880 | elapsed time per iteration (s): 0.32 | learning rate: 2.404E-05 | global batch size: 256 | lm loss: 2.536357E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 801.788 | TFLOPs: 28.07 | +7: iteration 19520/ 21553 | consumed samples: 4997120 | consumed tokens: 10234101760 | elapsed time per iteration (s): 0.31 | learning rate: 2.400E-05 | global batch size: 256 | lm loss: 2.534537E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.453 | TFLOPs: 29.25 | +7: iteration 19530/ 21553 | consumed samples: 4999680 | consumed tokens: 10239344640 | elapsed time per iteration (s): 0.30 | learning rate: 2.396E-05 | global batch size: 256 | lm loss: 2.502003E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.860 | TFLOPs: 29.47 | +7: iteration 19540/ 21553 | consumed samples: 5002240 | consumed tokens: 10244587520 | elapsed time per iteration (s): 0.30 | learning rate: 2.392E-05 | global batch size: 256 | lm loss: 2.527476E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.377 | TFLOPs: 29.77 | +7: iteration 19550/ 21553 | consumed samples: 5004800 | consumed tokens: 10249830400 | elapsed time per iteration (s): 0.30 | learning rate: 2.389E-05 | global batch size: 256 | lm loss: 2.549609E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.761 | TFLOPs: 29.54 | +7: iteration 19560/ 21553 | consumed samples: 5007360 | consumed tokens: 10255073280 | elapsed time per iteration (s): 0.30 | learning rate: 2.385E-05 | global batch size: 256 | lm loss: 2.512911E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.795 | TFLOPs: 29.89 | +7: iteration 19570/ 21553 | consumed samples: 5009920 | consumed tokens: 10260316160 | elapsed time per iteration (s): 0.30 | learning rate: 2.381E-05 | global batch size: 256 | lm loss: 2.501946E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.131 | TFLOPs: 29.41 | +7: iteration 19580/ 21553 | consumed samples: 5012480 | consumed tokens: 10265559040 | elapsed time per iteration (s): 0.31 | learning rate: 2.377E-05 | global batch size: 256 | lm loss: 2.528304E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.302 | TFLOPs: 29.10 | +7: iteration 19590/ 21553 | consumed samples: 5015040 | consumed tokens: 10270801920 | elapsed time per iteration (s): 0.30 | learning rate: 2.373E-05 | global batch size: 256 | lm loss: 2.496198E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.707 | TFLOPs: 29.47 | +7: iteration 19600/ 21553 | consumed samples: 5017600 | consumed tokens: 10276044800 | elapsed time per iteration (s): 0.30 | learning rate: 2.370E-05 | global batch size: 256 | lm loss: 2.492441E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.543 | TFLOPs: 29.50 | +7: iteration 19610/ 21553 | consumed samples: 5020160 | consumed tokens: 10281287680 | elapsed time per iteration (s): 0.31 | learning rate: 2.366E-05 | global batch size: 256 | lm loss: 2.526531E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.609 | TFLOPs: 28.62 | +7: iteration 19620/ 21553 | consumed samples: 5022720 | consumed tokens: 10286530560 | elapsed time per iteration (s): 0.31 | learning rate: 2.362E-05 | global batch size: 256 | lm loss: 2.535036E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.883 | TFLOPs: 28.56 | +7: iteration 19630/ 21553 | consumed samples: 5025280 | consumed tokens: 10291773440 | elapsed time per iteration (s): 0.30 | learning rate: 2.358E-05 | global batch size: 256 | lm loss: 2.535793E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.263 | TFLOPs: 29.98 | +7: iteration 19640/ 21553 | consumed samples: 5027840 | consumed tokens: 10297016320 | elapsed time per iteration (s): 0.31 | learning rate: 2.355E-05 | global batch size: 256 | lm loss: 2.545338E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.782 | TFLOPs: 29.29 | +7: iteration 19650/ 21553 | consumed samples: 5030400 | consumed tokens: 10302259200 | elapsed time per iteration (s): 0.30 | learning rate: 2.351E-05 | global batch size: 256 | lm loss: 2.490193E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.052 | TFLOPs: 29.65 | +7: iteration 19660/ 21553 | consumed samples: 5032960 | consumed tokens: 10307502080 | elapsed time per iteration (s): 0.31 | learning rate: 2.347E-05 | global batch size: 256 | lm loss: 2.523434E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.548 | TFLOPs: 29.32 | +7: iteration 19670/ 21553 | consumed samples: 5035520 | consumed tokens: 10312744960 | elapsed time per iteration (s): 0.32 | learning rate: 2.344E-05 | global batch size: 256 | lm loss: 2.505781E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.843 | TFLOPs: 28.28 | +7: iteration 19680/ 21553 | consumed samples: 5038080 | consumed tokens: 10317987840 | elapsed time per iteration (s): 0.31 | learning rate: 2.340E-05 | global batch size: 256 | lm loss: 2.496941E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.727 | TFLOPs: 28.98 | +7: iteration 19690/ 21553 | consumed samples: 5040640 | consumed tokens: 10323230720 | elapsed time per iteration (s): 0.31 | learning rate: 2.336E-05 | global batch size: 256 | lm loss: 2.507406E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.227 | TFLOPs: 29.24 | +7: iteration 19700/ 21553 | consumed samples: 5043200 | consumed tokens: 10328473600 | elapsed time per iteration (s): 0.30 | learning rate: 2.333E-05 | global batch size: 256 | lm loss: 2.512127E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.534 | TFLOPs: 29.46 | +7: iteration 19710/ 21553 | consumed samples: 5045760 | consumed tokens: 10333716480 | elapsed time per iteration (s): 0.30 | learning rate: 2.329E-05 | global batch size: 256 | lm loss: 2.505500E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.073 | TFLOPs: 29.55 | +7: iteration 19720/ 21553 | consumed samples: 5048320 | consumed tokens: 10338959360 | elapsed time per iteration (s): 0.30 | learning rate: 2.326E-05 | global batch size: 256 | lm loss: 2.493362E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.894 | TFLOPs: 29.51 | +7: iteration 19730/ 21553 | consumed samples: 5050880 | consumed tokens: 10344202240 | elapsed time per iteration (s): 0.30 | learning rate: 2.322E-05 | global batch size: 256 | lm loss: 2.513864E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.894 | TFLOPs: 29.89 | +7: iteration 19740/ 21553 | consumed samples: 5053440 | consumed tokens: 10349445120 | elapsed time per iteration (s): 0.30 | learning rate: 2.319E-05 | global batch size: 256 | lm loss: 2.545943E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.326 | TFLOPs: 29.77 | +7: iteration 19750/ 21553 | consumed samples: 5056000 | consumed tokens: 10354688000 | elapsed time per iteration (s): 0.30 | learning rate: 2.315E-05 | global batch size: 256 | lm loss: 2.520441E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.829 | TFLOPs: 29.72 | +7: iteration 19760/ 21553 | consumed samples: 5058560 | consumed tokens: 10359930880 | elapsed time per iteration (s): 0.30 | learning rate: 2.312E-05 | global batch size: 256 | lm loss: 2.512938E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.874 | TFLOPs: 29.72 | +7: iteration 19770/ 21553 | consumed samples: 5061120 | consumed tokens: 10365173760 | elapsed time per iteration (s): 0.30 | learning rate: 2.308E-05 | global batch size: 256 | lm loss: 2.520452E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.303 | TFLOPs: 29.73 | +7: iteration 19780/ 21553 | consumed samples: 5063680 | consumed tokens: 10370416640 | elapsed time per iteration (s): 0.30 | learning rate: 2.305E-05 | global batch size: 256 | lm loss: 2.528340E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.679 | TFLOPs: 29.71 | +7: iteration 19790/ 21553 | consumed samples: 5066240 | consumed tokens: 10375659520 | elapsed time per iteration (s): 0.31 | learning rate: 2.302E-05 | global batch size: 256 | lm loss: 2.519958E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.253 | TFLOPs: 28.96 | +7: iteration 19800/ 21553 | consumed samples: 5068800 | consumed tokens: 10380902400 | elapsed time per iteration (s): 0.30 | learning rate: 2.298E-05 | global batch size: 256 | lm loss: 2.490011E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.167 | TFLOPs: 29.48 | +7: iteration 19810/ 21553 | consumed samples: 5071360 | consumed tokens: 10386145280 | elapsed time per iteration (s): 0.31 | learning rate: 2.295E-05 | global batch size: 256 | lm loss: 2.524933E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.233 | TFLOPs: 29.10 | +7: iteration 19820/ 21553 | consumed samples: 5073920 | consumed tokens: 10391388160 | elapsed time per iteration (s): 0.30 | learning rate: 2.291E-05 | global batch size: 256 | lm loss: 2.519724E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.826 | TFLOPs: 29.86 | +7: iteration 19830/ 21553 | consumed samples: 5076480 | consumed tokens: 10396631040 | elapsed time per iteration (s): 0.30 | learning rate: 2.288E-05 | global batch size: 256 | lm loss: 2.491146E+00 | grad norm: 0.265 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.124 | TFLOPs: 29.62 | +7: iteration 19840/ 21553 | consumed samples: 5079040 | consumed tokens: 10401873920 | elapsed time per iteration (s): 0.31 | learning rate: 2.285E-05 | global batch size: 256 | lm loss: 2.496133E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.176 | TFLOPs: 28.96 | +7: iteration 19850/ 21553 | consumed samples: 5081600 | consumed tokens: 10407116800 | elapsed time per iteration (s): 0.33 | learning rate: 2.281E-05 | global batch size: 256 | lm loss: 2.539488E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 772.850 | TFLOPs: 27.06 | +7: iteration 19860/ 21553 | consumed samples: 5084160 | consumed tokens: 10412359680 | elapsed time per iteration (s): 0.32 | learning rate: 2.278E-05 | global batch size: 256 | lm loss: 2.535261E+00 | grad norm: 0.268 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 799.827 | TFLOPs: 28.00 | +7: iteration 19870/ 21553 | consumed samples: 5086720 | consumed tokens: 10417602560 | elapsed time per iteration (s): 0.31 | learning rate: 2.275E-05 | global batch size: 256 | lm loss: 2.543786E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.822 | TFLOPs: 29.33 | +7: iteration 19880/ 21553 | consumed samples: 5089280 | consumed tokens: 10422845440 | elapsed time per iteration (s): 0.30 | learning rate: 2.272E-05 | global batch size: 256 | lm loss: 2.520131E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.540 | TFLOPs: 29.95 | +7: iteration 19890/ 21553 | consumed samples: 5091840 | consumed tokens: 10428088320 | elapsed time per iteration (s): 0.31 | learning rate: 2.268E-05 | global batch size: 256 | lm loss: 2.519885E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.507 | TFLOPs: 28.97 | +7: iteration 19900/ 21553 | consumed samples: 5094400 | consumed tokens: 10433331200 | elapsed time per iteration (s): 0.31 | learning rate: 2.265E-05 | global batch size: 256 | lm loss: 2.518408E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.490 | TFLOPs: 29.32 | +7: iteration 19910/ 21553 | consumed samples: 5096960 | consumed tokens: 10438574080 | elapsed time per iteration (s): 0.30 | learning rate: 2.262E-05 | global batch size: 256 | lm loss: 2.538610E+00 | grad norm: 0.267 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.153 | TFLOPs: 29.76 | +7: iteration 19920/ 21553 | consumed samples: 5099520 | consumed tokens: 10443816960 | elapsed time per iteration (s): 0.31 | learning rate: 2.259E-05 | global batch size: 256 | lm loss: 2.548441E+00 | grad norm: 0.305 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 818.245 | TFLOPs: 28.64 | +7: iteration 19930/ 21553 | consumed samples: 5102080 | consumed tokens: 10449059840 | elapsed time per iteration (s): 0.30 | learning rate: 2.256E-05 | global batch size: 256 | lm loss: 2.484783E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.567 | TFLOPs: 29.64 | +7: iteration 19940/ 21553 | consumed samples: 5104640 | consumed tokens: 10454302720 | elapsed time per iteration (s): 0.30 | learning rate: 2.253E-05 | global batch size: 256 | lm loss: 2.546666E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.052 | TFLOPs: 29.48 | +7: iteration 19950/ 21553 | consumed samples: 5107200 | consumed tokens: 10459545600 | elapsed time per iteration (s): 0.30 | learning rate: 2.250E-05 | global batch size: 256 | lm loss: 2.494469E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.592 | TFLOPs: 30.06 | +7: iteration 19960/ 21553 | consumed samples: 5109760 | consumed tokens: 10464788480 | elapsed time per iteration (s): 0.30 | learning rate: 2.246E-05 | global batch size: 256 | lm loss: 2.492318E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.561 | TFLOPs: 29.39 | +7: iteration 19970/ 21553 | consumed samples: 5112320 | consumed tokens: 10470031360 | elapsed time per iteration (s): 0.31 | learning rate: 2.243E-05 | global batch size: 256 | lm loss: 2.533405E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 822.446 | TFLOPs: 28.79 | +7: iteration 19980/ 21553 | consumed samples: 5114880 | consumed tokens: 10475274240 | elapsed time per iteration (s): 0.31 | learning rate: 2.240E-05 | global batch size: 256 | lm loss: 2.537734E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.307 | TFLOPs: 29.31 | +7: iteration 19990/ 21553 | consumed samples: 5117440 | consumed tokens: 10480517120 | elapsed time per iteration (s): 0.31 | learning rate: 2.237E-05 | global batch size: 256 | lm loss: 2.531286E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.756 | TFLOPs: 29.05 | +0: [2022-11-25 19:14:57,916] [INFO] [logging.py:68:log_dist] [Rank 0] step=20000, skipped=0, lr=[2.234259142486404e-05, 2.234259142486404e-05, 2.234259142486404e-05], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 20000/ 21553 | consumed samples: 5120000 | consumed tokens: 10485760000 | elapsed time per iteration (s): 0.31 | learning rate: 2.234E-05 | global batch size: 256 | lm loss: 2.550459E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.378 | TFLOPs: 29.21 | +0: steps: 20000 loss: 2.5142 iter time (s): 0.304 samples/sec: 842.482 +7: ------------------------------------------------------------------------------------------- +7: valid loss at iteration 20000 | lm loss value: 2.402645E+00 | lm loss PPL: 1.105237E+01 | +7: ------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 20000 to checkpoints_146m +0: [2022-11-25 19:14:58,040] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step20000 is begin to save! +0: [2022-11-25 19:14:58,044] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_01-model_00-model_states.pt... +0: [2022-11-25 19:14:58,139] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_01-model_00-model_states.pt. +0: [2022-11-25 19:14:58,139] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_03-model_00-model_states.pt... +0: [2022-11-25 19:14:58,154] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_03-model_00-model_states.pt. +0: [2022-11-25 19:14:58,154] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_04-model_00-model_states.pt... +0: [2022-11-25 19:14:58,169] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_04-model_00-model_states.pt. +0: [2022-11-25 19:14:58,169] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_05-model_00-model_states.pt... +0: [2022-11-25 19:14:58,184] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_05-model_00-model_states.pt. +0: [2022-11-25 19:14:58,185] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_06-model_00-model_states.pt... +0: [2022-11-25 19:14:58,199] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_06-model_00-model_states.pt. +0: [2022-11-25 19:14:58,200] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_07-model_00-model_states.pt... +0: [2022-11-25 19:14:58,215] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_07-model_00-model_states.pt. +0: [2022-11-25 19:14:58,215] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_08-model_00-model_states.pt... +0: [2022-11-25 19:14:58,230] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_08-model_00-model_states.pt. +0: [2022-11-25 19:14:58,231] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_09-model_00-model_states.pt... +0: [2022-11-25 19:14:58,245] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_09-model_00-model_states.pt. +0: [2022-11-25 19:14:58,246] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_10-model_00-model_states.pt... +0: [2022-11-25 19:14:58,260] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_10-model_00-model_states.pt. +0: [2022-11-25 19:14:58,261] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_11-model_00-model_states.pt... +0: [2022-11-25 19:14:58,275] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_11-model_00-model_states.pt. +0: [2022-11-25 19:14:58,276] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_12-model_00-model_states.pt... +0: [2022-11-25 19:14:58,291] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_12-model_00-model_states.pt. +0: [2022-11-25 19:14:58,291] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_13-model_00-model_states.pt... +0: [2022-11-25 19:14:58,306] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_13-model_00-model_states.pt. +0: [2022-11-25 19:14:58,306] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_14-model_00-model_states.pt... +0: [2022-11-25 19:14:58,321] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_14-model_00-model_states.pt. +0: [2022-11-25 19:14:58,321] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_15-model_00-model_states.pt... +0: [2022-11-25 19:14:58,336] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_15-model_00-model_states.pt. +0: [2022-11-25 19:14:58,336] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_16-model_00-model_states.pt... +0: [2022-11-25 19:14:58,351] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_16-model_00-model_states.pt. +0: [2022-11-25 19:14:58,351] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_17-model_00-model_states.pt... +0: [2022-11-25 19:14:58,366] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_17-model_00-model_states.pt. +0: [2022-11-25 19:14:58,366] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/layer_19-model_00-model_states.pt... +0: [2022-11-25 19:14:58,367] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/layer_19-model_00-model_states.pt. +0: [2022-11-25 19:14:58,368] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step20000/mp_rank_00_model_states.pt +0: [2022-11-25 19:14:58,368] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/mp_rank_00_model_states.pt... +0: [2022-11-25 19:14:58,370] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/mp_rank_00_model_states.pt. +0: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:14:58,388] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step20000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:14:58,421] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:14:58,421] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 19:14:58,421] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2022-11-25 19:14:58,423] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:14:58,424] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 19:14:58,424] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:14:58,424] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +7: [2022-11-25 19:14:58,424] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +3: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2022-11-25 19:14:58,424] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2022-11-25 19:14:58,425] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:14:58,425] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 19:14:58,425] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2022-11-25 19:14:58,425] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:14:58,425] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 19:14:58,425] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2022-11-25 19:14:58,425] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:14:58,425] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 19:14:58,425] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:14:58,426] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2022-11-25 19:14:58,426] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2022-11-25 19:14:58,426] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:14:58,427] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 19:14:58,427] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2022-11-25 19:14:58,427] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:14:58,427] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 19:14:58,427] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2022-11-25 19:14:58,427] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:14:58,427] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 19:14:58,427] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:14:58,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2022-11-25 19:14:58,422] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:14:58,427] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:14:58,424] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +6: [2022-11-25 19:14:58,427] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +2: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2022-11-25 19:14:58,427] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2022-11-25 19:14:58,429] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:14:58,424] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +6: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:14:58,429] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +2: [2022-11-25 19:14:58,424] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2022-11-25 19:14:58,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +2: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:14:58,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +1: [2022-11-25 19:14:58,429] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2022-11-25 19:14:58,426] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +6: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2022-11-25 19:14:58,422] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +2: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:14:58,422] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:14:58,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +4: [2022-11-25 19:14:58,423] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:14:58,426] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +6: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2022-11-25 19:14:58,423] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +2: [2022-11-25 19:14:58,426] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +4: [2022-11-25 19:14:58,423] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2022-11-25 19:14:58,426] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +4: [2022-11-25 19:14:58,425] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2022-11-25 19:14:58,425] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +2: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2022-11-25 19:14:58,425] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2022-11-25 19:14:58,426] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:14:58,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +2: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:14:58,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +6: [2022-11-25 19:14:58,429] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2022-11-25 19:14:58,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2022-11-25 19:14:58,428] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2022-11-25 19:14:58,429] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 19:14:58,429] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2022-11-25 19:14:58,429] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:14:58,429] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:14:58,430] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:14:58,430] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 19:14:58,430] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2022-11-25 19:14:58,430] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:14:58,430] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 19:14:58,430] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2022-11-25 19:14:58,430] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 19:14:58,430] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 19:14:58,430] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2022-11-25 19:14:58,430] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2022-11-25 19:14:58,431] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:14:58,431] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 19:14:58,431] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:14:58,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2022-11-25 19:14:58,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +3: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2022-11-25 19:14:58,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:14:58,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:14:58,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +3: [2022-11-25 19:14:58,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +0: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:14:58,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +0: [2022-11-25 19:14:58,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +7: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2022-11-25 19:14:58,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +7: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2022-11-25 19:14:58,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +0: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2022-11-25 19:14:58,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:14:58,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 19:14:58,433] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2022-11-25 19:14:58,433] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:14:58,433] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 19:14:58,433] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:14:58,433] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2022-11-25 19:14:58,433] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 19:14:58,433] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2022-11-25 19:14:58,433] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:14:58,433] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:14:58,434] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 19:14:58,434] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 19:14:58,434] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2022-11-25 19:14:58,434] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2022-11-25 19:14:58,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:14:58,435] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2022-11-25 19:14:58,435] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2022-11-25 19:14:58,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:14:58,435] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 19:14:58,435] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2022-11-25 19:14:58,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:14:58,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:14:58,435] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 19:14:58,435] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +4: [2022-11-25 19:14:58,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:14:58,435] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2022-11-25 19:14:58,435] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2022-11-25 19:14:58,436] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:14:58,436] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 19:14:58,436] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2022-11-25 19:14:58,440] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:14:58,440] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2022-11-25 19:14:58,440] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2022-11-25 19:14:58,435] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +6: [2022-11-25 19:14:58,444] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:14:58,435] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2022-11-25 19:14:58,444] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2022-11-25 19:14:58,444] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2022-11-25 19:14:58,444] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:14:58,445] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 19:14:58,445] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2022-11-25 19:14:58,454] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step20000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 19:14:58,454] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: successfully saved checkpoint at iteration 20000 to checkpoints_146m +7: time (ms) | save-checkpoint: 421.20 +7: iteration 20010/ 21553 | consumed samples: 5122560 | consumed tokens: 10491002880 | elapsed time per iteration (s): 0.35 | learning rate: 2.231E-05 | global batch size: 256 | lm loss: 2.523115E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 722.767 | TFLOPs: 25.30 | +7: iteration 20020/ 21553 | consumed samples: 5125120 | consumed tokens: 10496245760 | elapsed time per iteration (s): 0.31 | learning rate: 2.228E-05 | global batch size: 256 | lm loss: 2.497172E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.780 | TFLOPs: 29.15 | +7: iteration 20030/ 21553 | consumed samples: 5127680 | consumed tokens: 10501488640 | elapsed time per iteration (s): 0.30 | learning rate: 2.225E-05 | global batch size: 256 | lm loss: 2.526027E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.860 | TFLOPs: 29.61 | +7: iteration 20040/ 21553 | consumed samples: 5130240 | consumed tokens: 10506731520 | elapsed time per iteration (s): 0.30 | learning rate: 2.222E-05 | global batch size: 256 | lm loss: 2.530202E+00 | grad norm: 0.266 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.251 | TFLOPs: 29.69 | +7: iteration 20050/ 21553 | consumed samples: 5132800 | consumed tokens: 10511974400 | elapsed time per iteration (s): 0.31 | learning rate: 2.219E-05 | global batch size: 256 | lm loss: 2.505853E+00 | grad norm: 0.265 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.508 | TFLOPs: 29.21 | +7: iteration 20060/ 21553 | consumed samples: 5135360 | consumed tokens: 10517217280 | elapsed time per iteration (s): 0.30 | learning rate: 2.217E-05 | global batch size: 256 | lm loss: 2.523782E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.198 | TFLOPs: 30.04 | +7: iteration 20070/ 21553 | consumed samples: 5137920 | consumed tokens: 10522460160 | elapsed time per iteration (s): 0.31 | learning rate: 2.214E-05 | global batch size: 256 | lm loss: 2.488841E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.026 | TFLOPs: 29.23 | +7: iteration 20080/ 21553 | consumed samples: 5140480 | consumed tokens: 10527703040 | elapsed time per iteration (s): 0.31 | learning rate: 2.211E-05 | global batch size: 256 | lm loss: 2.503445E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.738 | TFLOPs: 29.19 | +7: iteration 20090/ 21553 | consumed samples: 5143040 | consumed tokens: 10532945920 | elapsed time per iteration (s): 0.30 | learning rate: 2.208E-05 | global batch size: 256 | lm loss: 2.472672E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.790 | TFLOPs: 29.82 | +7: iteration 20100/ 21553 | consumed samples: 5145600 | consumed tokens: 10538188800 | elapsed time per iteration (s): 0.30 | learning rate: 2.205E-05 | global batch size: 256 | lm loss: 2.536821E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.397 | TFLOPs: 29.77 | +7: iteration 20110/ 21553 | consumed samples: 5148160 | consumed tokens: 10543431680 | elapsed time per iteration (s): 0.31 | learning rate: 2.202E-05 | global batch size: 256 | lm loss: 2.494213E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.835 | TFLOPs: 29.33 | +7: iteration 20120/ 21553 | consumed samples: 5150720 | consumed tokens: 10548674560 | elapsed time per iteration (s): 0.30 | learning rate: 2.200E-05 | global batch size: 256 | lm loss: 2.504790E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.031 | TFLOPs: 29.58 | +7: iteration 20130/ 21553 | consumed samples: 5153280 | consumed tokens: 10553917440 | elapsed time per iteration (s): 0.30 | learning rate: 2.197E-05 | global batch size: 256 | lm loss: 2.517706E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.230 | TFLOPs: 30.04 | +7: iteration 20140/ 21553 | consumed samples: 5155840 | consumed tokens: 10559160320 | elapsed time per iteration (s): 0.30 | learning rate: 2.194E-05 | global batch size: 256 | lm loss: 2.508090E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.041 | TFLOPs: 29.44 | +7: iteration 20150/ 21553 | consumed samples: 5158400 | consumed tokens: 10564403200 | elapsed time per iteration (s): 0.30 | learning rate: 2.191E-05 | global batch size: 256 | lm loss: 2.524609E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.020 | TFLOPs: 29.72 | +7: iteration 20160/ 21553 | consumed samples: 5160960 | consumed tokens: 10569646080 | elapsed time per iteration (s): 0.31 | learning rate: 2.189E-05 | global batch size: 256 | lm loss: 2.559445E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.795 | TFLOPs: 28.49 | +7: iteration 20170/ 21553 | consumed samples: 5163520 | consumed tokens: 10574888960 | elapsed time per iteration (s): 0.31 | learning rate: 2.186E-05 | global batch size: 256 | lm loss: 2.510787E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.868 | TFLOPs: 29.09 | +7: iteration 20180/ 21553 | consumed samples: 5166080 | consumed tokens: 10580131840 | elapsed time per iteration (s): 0.30 | learning rate: 2.183E-05 | global batch size: 256 | lm loss: 2.519798E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.366 | TFLOPs: 29.98 | +7: iteration 20190/ 21553 | consumed samples: 5168640 | consumed tokens: 10585374720 | elapsed time per iteration (s): 0.30 | learning rate: 2.181E-05 | global batch size: 256 | lm loss: 2.515867E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.773 | TFLOPs: 29.92 | +7: iteration 20200/ 21553 | consumed samples: 5171200 | consumed tokens: 10590617600 | elapsed time per iteration (s): 0.30 | learning rate: 2.178E-05 | global batch size: 256 | lm loss: 2.526282E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.076 | TFLOPs: 29.79 | +7: iteration 20210/ 21553 | consumed samples: 5173760 | consumed tokens: 10595860480 | elapsed time per iteration (s): 0.31 | learning rate: 2.175E-05 | global batch size: 256 | lm loss: 2.472913E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.651 | TFLOPs: 28.76 | +7: iteration 20220/ 21553 | consumed samples: 5176320 | consumed tokens: 10601103360 | elapsed time per iteration (s): 0.30 | learning rate: 2.173E-05 | global batch size: 256 | lm loss: 2.494554E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.182 | TFLOPs: 29.87 | +7: iteration 20230/ 21553 | consumed samples: 5178880 | consumed tokens: 10606346240 | elapsed time per iteration (s): 0.31 | learning rate: 2.170E-05 | global batch size: 256 | lm loss: 2.516012E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.124 | TFLOPs: 28.89 | +7: iteration 20240/ 21553 | consumed samples: 5181440 | consumed tokens: 10611589120 | elapsed time per iteration (s): 0.31 | learning rate: 2.168E-05 | global batch size: 256 | lm loss: 2.513171E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.640 | TFLOPs: 29.29 | +7: iteration 20250/ 21553 | consumed samples: 5184000 | consumed tokens: 10616832000 | elapsed time per iteration (s): 0.30 | learning rate: 2.165E-05 | global batch size: 256 | lm loss: 2.503572E+00 | grad norm: 0.297 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.566 | TFLOPs: 29.85 | +7: iteration 20260/ 21553 | consumed samples: 5186560 | consumed tokens: 10622074880 | elapsed time per iteration (s): 0.30 | learning rate: 2.163E-05 | global batch size: 256 | lm loss: 2.505381E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.558 | TFLOPs: 29.50 | +7: iteration 20270/ 21553 | consumed samples: 5189120 | consumed tokens: 10627317760 | elapsed time per iteration (s): 0.30 | learning rate: 2.160E-05 | global batch size: 256 | lm loss: 2.513100E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.517 | TFLOPs: 29.91 | +7: iteration 20280/ 21553 | consumed samples: 5191680 | consumed tokens: 10632560640 | elapsed time per iteration (s): 0.31 | learning rate: 2.158E-05 | global batch size: 256 | lm loss: 2.520186E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.371 | TFLOPs: 28.93 | +7: iteration 20290/ 21553 | consumed samples: 5194240 | consumed tokens: 10637803520 | elapsed time per iteration (s): 0.30 | learning rate: 2.155E-05 | global batch size: 256 | lm loss: 2.553865E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.442 | TFLOPs: 29.74 | +7: iteration 20300/ 21553 | consumed samples: 5196800 | consumed tokens: 10643046400 | elapsed time per iteration (s): 0.31 | learning rate: 2.153E-05 | global batch size: 256 | lm loss: 2.498795E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 815.592 | TFLOPs: 28.55 | +7: iteration 20310/ 21553 | consumed samples: 5199360 | consumed tokens: 10648289280 | elapsed time per iteration (s): 0.30 | learning rate: 2.150E-05 | global batch size: 256 | lm loss: 2.503430E+00 | grad norm: 0.269 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.883 | TFLOPs: 29.47 | +7: iteration 20320/ 21553 | consumed samples: 5201920 | consumed tokens: 10653532160 | elapsed time per iteration (s): 0.30 | learning rate: 2.148E-05 | global batch size: 256 | lm loss: 2.519548E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.251 | TFLOPs: 29.80 | +7: iteration 20330/ 21553 | consumed samples: 5204480 | consumed tokens: 10658775040 | elapsed time per iteration (s): 0.31 | learning rate: 2.146E-05 | global batch size: 256 | lm loss: 2.537739E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.055 | TFLOPs: 28.57 | +7: iteration 20340/ 21553 | consumed samples: 5207040 | consumed tokens: 10664017920 | elapsed time per iteration (s): 0.30 | learning rate: 2.143E-05 | global batch size: 256 | lm loss: 2.507456E+00 | grad norm: 0.260 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.769 | TFLOPs: 29.64 | +7: iteration 20350/ 21553 | consumed samples: 5209600 | consumed tokens: 10669260800 | elapsed time per iteration (s): 0.32 | learning rate: 2.141E-05 | global batch size: 256 | lm loss: 2.470770E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.202 | TFLOPs: 28.26 | +7: iteration 20360/ 21553 | consumed samples: 5212160 | consumed tokens: 10674503680 | elapsed time per iteration (s): 0.30 | learning rate: 2.138E-05 | global batch size: 256 | lm loss: 2.522427E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.335 | TFLOPs: 29.42 | +7: iteration 20370/ 21553 | consumed samples: 5214720 | consumed tokens: 10679746560 | elapsed time per iteration (s): 0.30 | learning rate: 2.136E-05 | global batch size: 256 | lm loss: 2.494706E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.000 | TFLOPs: 29.83 | +7: iteration 20380/ 21553 | consumed samples: 5217280 | consumed tokens: 10684989440 | elapsed time per iteration (s): 0.30 | learning rate: 2.134E-05 | global batch size: 256 | lm loss: 2.521528E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.533 | TFLOPs: 29.81 | +7: iteration 20390/ 21553 | consumed samples: 5219840 | consumed tokens: 10690232320 | elapsed time per iteration (s): 0.30 | learning rate: 2.132E-05 | global batch size: 256 | lm loss: 2.492818E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.125 | TFLOPs: 29.76 | +7: iteration 20400/ 21553 | consumed samples: 5222400 | consumed tokens: 10695475200 | elapsed time per iteration (s): 0.30 | learning rate: 2.129E-05 | global batch size: 256 | lm loss: 2.542459E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.048 | TFLOPs: 30.00 | +7: iteration 20410/ 21553 | consumed samples: 5224960 | consumed tokens: 10700718080 | elapsed time per iteration (s): 0.31 | learning rate: 2.127E-05 | global batch size: 256 | lm loss: 2.499960E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.605 | TFLOPs: 28.76 | +7: iteration 20420/ 21553 | consumed samples: 5227520 | consumed tokens: 10705960960 | elapsed time per iteration (s): 0.30 | learning rate: 2.125E-05 | global batch size: 256 | lm loss: 2.531130E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.942 | TFLOPs: 29.75 | +7: iteration 20430/ 21553 | consumed samples: 5230080 | consumed tokens: 10711203840 | elapsed time per iteration (s): 0.30 | learning rate: 2.123E-05 | global batch size: 256 | lm loss: 2.521899E+00 | grad norm: 0.293 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.889 | TFLOPs: 29.89 | +7: iteration 20440/ 21553 | consumed samples: 5232640 | consumed tokens: 10716446720 | elapsed time per iteration (s): 0.31 | learning rate: 2.121E-05 | global batch size: 256 | lm loss: 2.521170E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.384 | TFLOPs: 29.24 | +7: iteration 20450/ 21553 | consumed samples: 5235200 | consumed tokens: 10721689600 | elapsed time per iteration (s): 0.31 | learning rate: 2.118E-05 | global batch size: 256 | lm loss: 2.510018E+00 | grad norm: 0.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.272 | TFLOPs: 29.07 | +7: iteration 20460/ 21553 | consumed samples: 5237760 | consumed tokens: 10726932480 | elapsed time per iteration (s): 0.30 | learning rate: 2.116E-05 | global batch size: 256 | lm loss: 2.510375E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.373 | TFLOPs: 29.59 | +7: iteration 20470/ 21553 | consumed samples: 5240320 | consumed tokens: 10732175360 | elapsed time per iteration (s): 0.31 | learning rate: 2.114E-05 | global batch size: 256 | lm loss: 2.513503E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.289 | TFLOPs: 29.03 | +7: iteration 20480/ 21553 | consumed samples: 5242880 | consumed tokens: 10737418240 | elapsed time per iteration (s): 0.31 | learning rate: 2.112E-05 | global batch size: 256 | lm loss: 2.522111E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.824 | TFLOPs: 29.05 | +7: iteration 20490/ 21553 | consumed samples: 5245440 | consumed tokens: 10742661120 | elapsed time per iteration (s): 0.30 | learning rate: 2.110E-05 | global batch size: 256 | lm loss: 2.518353E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.050 | TFLOPs: 29.69 | +7: iteration 20500/ 21553 | consumed samples: 5248000 | consumed tokens: 10747904000 | elapsed time per iteration (s): 0.30 | learning rate: 2.108E-05 | global batch size: 256 | lm loss: 2.526006E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.283 | TFLOPs: 29.52 | +7: iteration 20510/ 21553 | consumed samples: 5250560 | consumed tokens: 10753146880 | elapsed time per iteration (s): 0.31 | learning rate: 2.106E-05 | global batch size: 256 | lm loss: 2.513317E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 823.965 | TFLOPs: 28.84 | +7: iteration 20520/ 21553 | consumed samples: 5253120 | consumed tokens: 10758389760 | elapsed time per iteration (s): 0.32 | learning rate: 2.104E-05 | global batch size: 256 | lm loss: 2.566718E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 811.468 | TFLOPs: 28.41 | +7: iteration 20530/ 21553 | consumed samples: 5255680 | consumed tokens: 10763632640 | elapsed time per iteration (s): 0.30 | learning rate: 2.102E-05 | global batch size: 256 | lm loss: 2.529466E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.937 | TFLOPs: 29.86 | +7: iteration 20540/ 21553 | consumed samples: 5258240 | consumed tokens: 10768875520 | elapsed time per iteration (s): 0.30 | learning rate: 2.100E-05 | global batch size: 256 | lm loss: 2.499536E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.464 | TFLOPs: 29.56 | +7: iteration 20550/ 21553 | consumed samples: 5260800 | consumed tokens: 10774118400 | elapsed time per iteration (s): 0.30 | learning rate: 2.098E-05 | global batch size: 256 | lm loss: 2.530056E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.211 | TFLOPs: 29.83 | +7: iteration 20560/ 21553 | consumed samples: 5263360 | consumed tokens: 10779361280 | elapsed time per iteration (s): 0.31 | learning rate: 2.096E-05 | global batch size: 256 | lm loss: 2.516222E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.512 | TFLOPs: 28.48 | +7: iteration 20570/ 21553 | consumed samples: 5265920 | consumed tokens: 10784604160 | elapsed time per iteration (s): 0.31 | learning rate: 2.094E-05 | global batch size: 256 | lm loss: 2.505696E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.756 | TFLOPs: 29.05 | +7: iteration 20580/ 21553 | consumed samples: 5268480 | consumed tokens: 10789847040 | elapsed time per iteration (s): 0.30 | learning rate: 2.092E-05 | global batch size: 256 | lm loss: 2.484843E+00 | grad norm: 0.272 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.193 | TFLOPs: 29.55 | +7: iteration 20590/ 21553 | consumed samples: 5271040 | consumed tokens: 10795089920 | elapsed time per iteration (s): 0.31 | learning rate: 2.090E-05 | global batch size: 256 | lm loss: 2.520329E+00 | grad norm: 0.301 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.482 | TFLOPs: 29.35 | +7: iteration 20600/ 21553 | consumed samples: 5273600 | consumed tokens: 10800332800 | elapsed time per iteration (s): 0.31 | learning rate: 2.088E-05 | global batch size: 256 | lm loss: 2.493446E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.306 | TFLOPs: 29.17 | +7: iteration 20610/ 21553 | consumed samples: 5276160 | consumed tokens: 10805575680 | elapsed time per iteration (s): 0.32 | learning rate: 2.087E-05 | global batch size: 256 | lm loss: 2.501813E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 801.852 | TFLOPs: 28.07 | +7: iteration 20620/ 21553 | consumed samples: 5278720 | consumed tokens: 10810818560 | elapsed time per iteration (s): 0.31 | learning rate: 2.085E-05 | global batch size: 256 | lm loss: 2.491986E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.300 | TFLOPs: 28.96 | +7: iteration 20630/ 21553 | consumed samples: 5281280 | consumed tokens: 10816061440 | elapsed time per iteration (s): 0.31 | learning rate: 2.083E-05 | global batch size: 256 | lm loss: 2.543856E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.058 | TFLOPs: 29.20 | +7: iteration 20640/ 21553 | consumed samples: 5283840 | consumed tokens: 10821304320 | elapsed time per iteration (s): 0.31 | learning rate: 2.081E-05 | global batch size: 256 | lm loss: 2.523484E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.636 | TFLOPs: 29.15 | +7: iteration 20650/ 21553 | consumed samples: 5286400 | consumed tokens: 10826547200 | elapsed time per iteration (s): 0.31 | learning rate: 2.079E-05 | global batch size: 256 | lm loss: 2.515150E+00 | grad norm: 0.263 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.990 | TFLOPs: 29.20 | +7: iteration 20660/ 21553 | consumed samples: 5288960 | consumed tokens: 10831790080 | elapsed time per iteration (s): 0.30 | learning rate: 2.078E-05 | global batch size: 256 | lm loss: 2.544303E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.077 | TFLOPs: 29.41 | +7: iteration 20670/ 21553 | consumed samples: 5291520 | consumed tokens: 10837032960 | elapsed time per iteration (s): 0.31 | learning rate: 2.076E-05 | global batch size: 256 | lm loss: 2.519405E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.956 | TFLOPs: 29.19 | +7: iteration 20680/ 21553 | consumed samples: 5294080 | consumed tokens: 10842275840 | elapsed time per iteration (s): 0.30 | learning rate: 2.074E-05 | global batch size: 256 | lm loss: 2.499067E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.437 | TFLOPs: 29.74 | +7: iteration 20690/ 21553 | consumed samples: 5296640 | consumed tokens: 10847518720 | elapsed time per iteration (s): 0.31 | learning rate: 2.073E-05 | global batch size: 256 | lm loss: 2.511831E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.627 | TFLOPs: 29.36 | +7: iteration 20700/ 21553 | consumed samples: 5299200 | consumed tokens: 10852761600 | elapsed time per iteration (s): 0.31 | learning rate: 2.071E-05 | global batch size: 256 | lm loss: 2.523054E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.619 | TFLOPs: 28.48 | +7: iteration 20710/ 21553 | consumed samples: 5301760 | consumed tokens: 10858004480 | elapsed time per iteration (s): 0.31 | learning rate: 2.069E-05 | global batch size: 256 | lm loss: 2.492922E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.925 | TFLOPs: 29.23 | +7: iteration 20720/ 21553 | consumed samples: 5304320 | consumed tokens: 10863247360 | elapsed time per iteration (s): 0.30 | learning rate: 2.068E-05 | global batch size: 256 | lm loss: 2.501687E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.630 | TFLOPs: 29.46 | +7: iteration 20730/ 21553 | consumed samples: 5306880 | consumed tokens: 10868490240 | elapsed time per iteration (s): 0.31 | learning rate: 2.066E-05 | global batch size: 256 | lm loss: 2.516529E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.049 | TFLOPs: 28.99 | +7: iteration 20740/ 21553 | consumed samples: 5309440 | consumed tokens: 10873733120 | elapsed time per iteration (s): 0.30 | learning rate: 2.064E-05 | global batch size: 256 | lm loss: 2.503337E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.136 | TFLOPs: 29.55 | +7: iteration 20750/ 21553 | consumed samples: 5312000 | consumed tokens: 10878976000 | elapsed time per iteration (s): 0.30 | learning rate: 2.063E-05 | global batch size: 256 | lm loss: 2.520445E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.238 | TFLOPs: 29.52 | +7: iteration 20760/ 21553 | consumed samples: 5314560 | consumed tokens: 10884218880 | elapsed time per iteration (s): 0.31 | learning rate: 2.061E-05 | global batch size: 256 | lm loss: 2.523934E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.985 | TFLOPs: 28.71 | +7: iteration 20770/ 21553 | consumed samples: 5317120 | consumed tokens: 10889461760 | elapsed time per iteration (s): 0.33 | learning rate: 2.060E-05 | global batch size: 256 | lm loss: 2.516638E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 783.459 | TFLOPs: 27.43 | +7: iteration 20780/ 21553 | consumed samples: 5319680 | consumed tokens: 10894704640 | elapsed time per iteration (s): 0.31 | learning rate: 2.058E-05 | global batch size: 256 | lm loss: 2.514867E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 816.373 | TFLOPs: 28.58 | +7: iteration 20790/ 21553 | consumed samples: 5322240 | consumed tokens: 10899947520 | elapsed time per iteration (s): 0.31 | learning rate: 2.057E-05 | global batch size: 256 | lm loss: 2.522353E+00 | grad norm: 0.267 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.422 | TFLOPs: 28.86 | +7: iteration 20800/ 21553 | consumed samples: 5324800 | consumed tokens: 10905190400 | elapsed time per iteration (s): 0.31 | learning rate: 2.055E-05 | global batch size: 256 | lm loss: 2.543201E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.449 | TFLOPs: 28.76 | +7: iteration 20810/ 21553 | consumed samples: 5327360 | consumed tokens: 10910433280 | elapsed time per iteration (s): 0.31 | learning rate: 2.054E-05 | global batch size: 256 | lm loss: 2.514869E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.216 | TFLOPs: 28.99 | +7: iteration 20820/ 21553 | consumed samples: 5329920 | consumed tokens: 10915676160 | elapsed time per iteration (s): 0.31 | learning rate: 2.052E-05 | global batch size: 256 | lm loss: 2.517473E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.969 | TFLOPs: 28.49 | +7: iteration 20830/ 21553 | consumed samples: 5332480 | consumed tokens: 10920919040 | elapsed time per iteration (s): 0.30 | learning rate: 2.051E-05 | global batch size: 256 | lm loss: 2.478801E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.243 | TFLOPs: 29.97 | +7: iteration 20840/ 21553 | consumed samples: 5335040 | consumed tokens: 10926161920 | elapsed time per iteration (s): 0.32 | learning rate: 2.050E-05 | global batch size: 256 | lm loss: 2.508255E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 791.958 | TFLOPs: 27.72 | +7: iteration 20850/ 21553 | consumed samples: 5337600 | consumed tokens: 10931404800 | elapsed time per iteration (s): 0.30 | learning rate: 2.048E-05 | global batch size: 256 | lm loss: 2.519702E+00 | grad norm: 0.288 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.433 | TFLOPs: 29.74 | +7: iteration 20860/ 21553 | consumed samples: 5340160 | consumed tokens: 10936647680 | elapsed time per iteration (s): 0.31 | learning rate: 2.047E-05 | global batch size: 256 | lm loss: 2.523100E+00 | grad norm: 0.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.652 | TFLOPs: 28.87 | +7: iteration 20870/ 21553 | consumed samples: 5342720 | consumed tokens: 10941890560 | elapsed time per iteration (s): 0.30 | learning rate: 2.045E-05 | global batch size: 256 | lm loss: 2.532711E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.194 | TFLOPs: 29.62 | +7: iteration 20880/ 21553 | consumed samples: 5345280 | consumed tokens: 10947133440 | elapsed time per iteration (s): 0.30 | learning rate: 2.044E-05 | global batch size: 256 | lm loss: 2.522735E+00 | grad norm: 0.276 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.129 | TFLOPs: 29.52 | +7: iteration 20890/ 21553 | consumed samples: 5347840 | consumed tokens: 10952376320 | elapsed time per iteration (s): 0.30 | learning rate: 2.043E-05 | global batch size: 256 | lm loss: 2.503029E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.328 | TFLOPs: 29.98 | +7: iteration 20900/ 21553 | consumed samples: 5350400 | consumed tokens: 10957619200 | elapsed time per iteration (s): 0.30 | learning rate: 2.042E-05 | global batch size: 256 | lm loss: 2.503515E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.074 | TFLOPs: 29.48 | +7: iteration 20910/ 21553 | consumed samples: 5352960 | consumed tokens: 10962862080 | elapsed time per iteration (s): 0.30 | learning rate: 2.040E-05 | global batch size: 256 | lm loss: 2.500513E+00 | grad norm: 0.303 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.631 | TFLOPs: 29.46 | +7: iteration 20920/ 21553 | consumed samples: 5355520 | consumed tokens: 10968104960 | elapsed time per iteration (s): 0.31 | learning rate: 2.039E-05 | global batch size: 256 | lm loss: 2.509859E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.626 | TFLOPs: 29.29 | +7: iteration 20930/ 21553 | consumed samples: 5358080 | consumed tokens: 10973347840 | elapsed time per iteration (s): 0.31 | learning rate: 2.038E-05 | global batch size: 256 | lm loss: 2.513692E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.481 | TFLOPs: 28.62 | +7: iteration 20940/ 21553 | consumed samples: 5360640 | consumed tokens: 10978590720 | elapsed time per iteration (s): 0.31 | learning rate: 2.037E-05 | global batch size: 256 | lm loss: 2.484138E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.798 | TFLOPs: 29.36 | +7: iteration 20950/ 21553 | consumed samples: 5363200 | consumed tokens: 10983833600 | elapsed time per iteration (s): 0.30 | learning rate: 2.035E-05 | global batch size: 256 | lm loss: 2.503504E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.306 | TFLOPs: 29.45 | +7: iteration 20960/ 21553 | consumed samples: 5365760 | consumed tokens: 10989076480 | elapsed time per iteration (s): 0.31 | learning rate: 2.034E-05 | global batch size: 256 | lm loss: 2.508119E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.747 | TFLOPs: 29.08 | +7: iteration 20970/ 21553 | consumed samples: 5368320 | consumed tokens: 10994319360 | elapsed time per iteration (s): 0.31 | learning rate: 2.033E-05 | global batch size: 256 | lm loss: 2.501108E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.411 | TFLOPs: 29.25 | +7: iteration 20980/ 21553 | consumed samples: 5370880 | consumed tokens: 10999562240 | elapsed time per iteration (s): 0.30 | learning rate: 2.032E-05 | global batch size: 256 | lm loss: 2.488489E+00 | grad norm: 0.280 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.335 | TFLOPs: 29.42 | +7: iteration 20990/ 21553 | consumed samples: 5373440 | consumed tokens: 11004805120 | elapsed time per iteration (s): 0.30 | learning rate: 2.031E-05 | global batch size: 256 | lm loss: 2.516279E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.823 | TFLOPs: 29.43 | +7: iteration 21000/ 21553 | consumed samples: 5376000 | consumed tokens: 11010048000 | elapsed time per iteration (s): 0.31 | learning rate: 2.030E-05 | global batch size: 256 | lm loss: 2.522742E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.601 | TFLOPs: 29.08 | +7: ------------------------------------------------------------------------------------------- +7: valid loss at iteration 21000 | lm loss value: 2.456617E+00 | lm loss PPL: 1.166528E+01 | +7: ------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 21000 to checkpoints_146m +0: [2022-11-25 19:20:04,623] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step21000 is begin to save! +0: [2022-11-25 19:20:04,627] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_01-model_00-model_states.pt... +0: [2022-11-25 19:20:04,760] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_01-model_00-model_states.pt. +0: [2022-11-25 19:20:04,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_03-model_00-model_states.pt... +0: [2022-11-25 19:20:04,781] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_03-model_00-model_states.pt. +0: [2022-11-25 19:20:04,782] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_04-model_00-model_states.pt... +0: [2022-11-25 19:20:04,803] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_04-model_00-model_states.pt. +0: [2022-11-25 19:20:04,804] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_05-model_00-model_states.pt... +0: [2022-11-25 19:20:04,825] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_05-model_00-model_states.pt. +0: [2022-11-25 19:20:04,825] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_06-model_00-model_states.pt... +0: [2022-11-25 19:20:04,846] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_06-model_00-model_states.pt. +0: [2022-11-25 19:20:04,846] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_07-model_00-model_states.pt... +0: [2022-11-25 19:20:04,863] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_07-model_00-model_states.pt. +0: [2022-11-25 19:20:04,863] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_08-model_00-model_states.pt... +0: [2022-11-25 19:20:04,878] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_08-model_00-model_states.pt. +0: [2022-11-25 19:20:04,878] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_09-model_00-model_states.pt... +0: [2022-11-25 19:20:04,893] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_09-model_00-model_states.pt. +0: [2022-11-25 19:20:04,893] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_10-model_00-model_states.pt... +0: [2022-11-25 19:20:04,908] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_10-model_00-model_states.pt. +0: [2022-11-25 19:20:04,908] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_11-model_00-model_states.pt... +0: [2022-11-25 19:20:04,923] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_11-model_00-model_states.pt. +0: [2022-11-25 19:20:04,923] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_12-model_00-model_states.pt... +0: [2022-11-25 19:20:04,938] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_12-model_00-model_states.pt. +0: [2022-11-25 19:20:04,938] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_13-model_00-model_states.pt... +0: [2022-11-25 19:20:04,952] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_13-model_00-model_states.pt. +0: [2022-11-25 19:20:04,953] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_14-model_00-model_states.pt... +0: [2022-11-25 19:20:04,967] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_14-model_00-model_states.pt. +0: [2022-11-25 19:20:04,968] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_15-model_00-model_states.pt... +0: [2022-11-25 19:20:04,982] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_15-model_00-model_states.pt. +0: [2022-11-25 19:20:04,982] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_16-model_00-model_states.pt... +0: [2022-11-25 19:20:04,997] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_16-model_00-model_states.pt. +0: [2022-11-25 19:20:04,997] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_17-model_00-model_states.pt... +0: [2022-11-25 19:20:05,012] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_17-model_00-model_states.pt. +0: [2022-11-25 19:20:05,012] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/layer_19-model_00-model_states.pt... +0: [2022-11-25 19:20:05,013] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/layer_19-model_00-model_states.pt. +0: [2022-11-25 19:20:05,014] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step21000/mp_rank_00_model_states.pt +0: [2022-11-25 19:20:05,014] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/mp_rank_00_model_states.pt... +0: [2022-11-25 19:20:05,017] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/mp_rank_00_model_states.pt. +0: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:20:05,035] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:20:05,066] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:20:05,066] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 19:20:05,066] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2022-11-25 19:20:05,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:20:05,070] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 19:20:05,070] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2022-11-25 19:20:05,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:20:05,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 19:20:05,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2022-11-25 19:20:05,071] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:20:05,071] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:20:05,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 19:20:05,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2022-11-25 19:20:05,071] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:20:05,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 19:20:05,072] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2022-11-25 19:20:05,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:20:05,072] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 19:20:05,072] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2022-11-25 19:20:05,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:20:05,072] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 19:20:05,072] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2022-11-25 19:20:05,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:20:05,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 19:20:05,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2022-11-25 19:20:05,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:20:05,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:20:05,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +0: [2022-11-25 19:20:05,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3: [2022-11-25 19:20:05,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2022-11-25 19:20:05,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2022-11-25 19:20:05,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:20:05,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2022-11-25 19:20:05,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2022-11-25 19:20:05,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 19:20:05,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2022-11-25 19:20:05,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:20:05,072] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2022-11-25 19:20:05,072] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2022-11-25 19:20:05,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:20:05,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2022-11-25 19:20:05,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2022-11-25 19:20:05,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:20:05,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 19:20:05,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2022-11-25 19:20:05,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:20:05,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2022-11-25 19:20:05,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2022-11-25 19:20:05,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:20:05,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2022-11-25 19:20:05,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2022-11-25 19:20:05,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:20:05,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 19:20:05,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2022-11-25 19:20:05,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:20:05,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 19:20:05,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2022-11-25 19:20:05,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:20:05,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:20:05,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:20:05,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:20:05,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 19:20:05,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2022-11-25 19:20:05,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:20:05,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 19:20:05,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2022-11-25 19:20:05,076] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:20:05,076] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:20:05,076] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 19:20:05,076] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 19:20:05,076] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2022-11-25 19:20:05,076] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2022-11-25 19:20:05,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:20:05,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2022-11-25 19:20:05,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2022-11-25 19:20:05,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:20:05,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:20:05,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 19:20:05,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2022-11-25 19:20:05,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2022-11-25 19:20:05,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:20:05,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 19:20:05,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 19:20:05,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:20:05,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 19:20:05,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2022-11-25 19:20:05,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2022-11-25 19:20:05,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2022-11-25 19:20:05,082] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:20:05,082] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +4: [2022-11-25 19:20:05,070] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +5: [2022-11-25 19:20:05,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2022-11-25 19:20:05,070] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2022-11-25 19:20:05,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +6: [2022-11-25 19:20:05,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +4: [2022-11-25 19:20:05,071] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:20:05,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2022-11-25 19:20:05,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2022-11-25 19:20:05,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +2: [2022-11-25 19:20:05,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:20:05,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:20:05,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2022-11-25 19:20:05,072] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +6: [2022-11-25 19:20:05,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +4: [2022-11-25 19:20:05,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:20:05,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:20:05,072] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2022-11-25 19:20:05,082] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:20:05,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2022-11-25 19:20:05,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 19:20:05,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +2: [2022-11-25 19:20:05,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:20:05,079] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:20:05,077] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2022-11-25 19:20:05,077] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2022-11-25 19:20:05,072] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +5: [2022-11-25 19:20:05,082] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +6: [2022-11-25 19:20:05,079] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:20:05,072] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2022-11-25 19:20:05,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +4: [2022-11-25 19:20:05,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +2: [2022-11-25 19:20:05,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:20:05,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:20:05,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +4: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2022-11-25 19:20:05,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 19:20:05,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +5: [2022-11-25 19:20:05,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2022-11-25 19:20:05,079] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:20:05,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2022-11-25 19:20:05,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2022-11-25 19:20:05,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 19:20:05,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +2: [2022-11-25 19:20:05,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2022-11-25 19:20:05,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2022-11-25 19:20:05,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2022-11-25 19:20:05,076] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +6: [2022-11-25 19:20:05,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +4: [2022-11-25 19:20:05,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:20:05,076] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2022-11-25 19:20:05,082] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +2: [2022-11-25 19:20:05,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:20:05,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2022-11-25 19:20:05,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +5: [2022-11-25 19:20:05,082] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:20:05,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +2: [2022-11-25 19:20:05,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2022-11-25 19:20:05,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2022-11-25 19:20:05,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:20:05,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +5: [2022-11-25 19:20:05,082] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +6: [2022-11-25 19:20:05,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +2: [2022-11-25 19:20:05,079] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2022-11-25 19:20:05,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2022-11-25 19:20:05,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2022-11-25 19:20:05,082] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:20:05,082] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:20:05,082] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 19:20:05,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2022-11-25 19:20:05,082] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:20:05,083] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 19:20:05,083] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2022-11-25 19:20:05,083] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:20:05,083] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2022-11-25 19:20:05,083] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2022-11-25 19:20:05,083] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:20:05,083] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 19:20:05,083] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2022-11-25 19:20:05,084] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:20:05,084] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 19:20:05,084] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2022-11-25 19:20:05,084] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:20:05,084] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2022-11-25 19:20:05,084] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2022-11-25 19:20:05,099] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 19:20:05,099] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: successfully saved checkpoint at iteration 21000 to checkpoints_146m +7: time (ms) | save-checkpoint: 483.37 +7: iteration 21010/ 21553 | consumed samples: 5378560 | consumed tokens: 11015290880 | elapsed time per iteration (s): 0.36 | learning rate: 2.029E-05 | global batch size: 256 | lm loss: 2.538111E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 704.150 | TFLOPs: 24.65 | +7: iteration 21020/ 21553 | consumed samples: 5381120 | consumed tokens: 11020533760 | elapsed time per iteration (s): 0.30 | learning rate: 2.028E-05 | global batch size: 256 | lm loss: 2.527905E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.983 | TFLOPs: 29.58 | +7: iteration 21030/ 21553 | consumed samples: 5383680 | consumed tokens: 11025776640 | elapsed time per iteration (s): 0.31 | learning rate: 2.027E-05 | global batch size: 256 | lm loss: 2.544790E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.586 | TFLOPs: 29.15 | +7: iteration 21040/ 21553 | consumed samples: 5386240 | consumed tokens: 11031019520 | elapsed time per iteration (s): 0.31 | learning rate: 2.026E-05 | global batch size: 256 | lm loss: 2.499258E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.570 | TFLOPs: 29.36 | +7: iteration 21050/ 21553 | consumed samples: 5388800 | consumed tokens: 11036262400 | elapsed time per iteration (s): 0.30 | learning rate: 2.025E-05 | global batch size: 256 | lm loss: 2.516907E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.663 | TFLOPs: 29.85 | +7: iteration 21060/ 21553 | consumed samples: 5391360 | consumed tokens: 11041505280 | elapsed time per iteration (s): 0.30 | learning rate: 2.024E-05 | global batch size: 256 | lm loss: 2.544616E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.764 | TFLOPs: 29.64 | +7: iteration 21070/ 21553 | consumed samples: 5393920 | consumed tokens: 11046748160 | elapsed time per iteration (s): 0.30 | learning rate: 2.023E-05 | global batch size: 256 | lm loss: 2.496880E+00 | grad norm: 0.292 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.854 | TFLOPs: 30.00 | +7: iteration 21080/ 21553 | consumed samples: 5396480 | consumed tokens: 11051991040 | elapsed time per iteration (s): 0.31 | learning rate: 2.022E-05 | global batch size: 256 | lm loss: 2.490743E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.925 | TFLOPs: 29.30 | +7: iteration 21090/ 21553 | consumed samples: 5399040 | consumed tokens: 11057233920 | elapsed time per iteration (s): 0.31 | learning rate: 2.021E-05 | global batch size: 256 | lm loss: 2.496221E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.885 | TFLOPs: 29.16 | +7: iteration 21100/ 21553 | consumed samples: 5401600 | consumed tokens: 11062476800 | elapsed time per iteration (s): 0.31 | learning rate: 2.020E-05 | global batch size: 256 | lm loss: 2.499943E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.892 | TFLOPs: 29.05 | +7: iteration 21110/ 21553 | consumed samples: 5404160 | consumed tokens: 11067719680 | elapsed time per iteration (s): 0.30 | learning rate: 2.019E-05 | global batch size: 256 | lm loss: 2.537133E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.915 | TFLOPs: 29.58 | +7: iteration 21120/ 21553 | consumed samples: 5406720 | consumed tokens: 11072962560 | elapsed time per iteration (s): 0.30 | learning rate: 2.018E-05 | global batch size: 256 | lm loss: 2.535813E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.752 | TFLOPs: 29.68 | +7: iteration 21130/ 21553 | consumed samples: 5409280 | consumed tokens: 11078205440 | elapsed time per iteration (s): 0.30 | learning rate: 2.017E-05 | global batch size: 256 | lm loss: 2.497601E+00 | grad norm: 0.273 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.288 | TFLOPs: 29.87 | +7: iteration 21140/ 21553 | consumed samples: 5411840 | consumed tokens: 11083448320 | elapsed time per iteration (s): 0.31 | learning rate: 2.017E-05 | global batch size: 256 | lm loss: 2.514046E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.264 | TFLOPs: 29.35 | +7: iteration 21150/ 21553 | consumed samples: 5414400 | consumed tokens: 11088691200 | elapsed time per iteration (s): 0.31 | learning rate: 2.016E-05 | global batch size: 256 | lm loss: 2.533695E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.907 | TFLOPs: 29.37 | +7: iteration 21160/ 21553 | consumed samples: 5416960 | consumed tokens: 11093934080 | elapsed time per iteration (s): 0.31 | learning rate: 2.015E-05 | global batch size: 256 | lm loss: 2.518883E+00 | grad norm: 0.298 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.641 | TFLOPs: 28.76 | +7: iteration 21170/ 21553 | consumed samples: 5419520 | consumed tokens: 11099176960 | elapsed time per iteration (s): 0.31 | learning rate: 2.014E-05 | global batch size: 256 | lm loss: 2.498919E+00 | grad norm: 0.286 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 818.966 | TFLOPs: 28.67 | +7: iteration 21180/ 21553 | consumed samples: 5422080 | consumed tokens: 11104419840 | elapsed time per iteration (s): 0.30 | learning rate: 2.014E-05 | global batch size: 256 | lm loss: 2.496676E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.677 | TFLOPs: 29.64 | +7: iteration 21190/ 21553 | consumed samples: 5424640 | consumed tokens: 11109662720 | elapsed time per iteration (s): 0.30 | learning rate: 2.013E-05 | global batch size: 256 | lm loss: 2.516395E+00 | grad norm: 0.275 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.376 | TFLOPs: 29.94 | +7: iteration 21200/ 21553 | consumed samples: 5427200 | consumed tokens: 11114905600 | elapsed time per iteration (s): 0.31 | learning rate: 2.012E-05 | global batch size: 256 | lm loss: 2.503548E+00 | grad norm: 0.300 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.190 | TFLOPs: 29.38 | +7: iteration 21210/ 21553 | consumed samples: 5429760 | consumed tokens: 11120148480 | elapsed time per iteration (s): 0.31 | learning rate: 2.011E-05 | global batch size: 256 | lm loss: 2.490578E+00 | grad norm: 0.271 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.622 | TFLOPs: 28.73 | +7: iteration 21220/ 21553 | consumed samples: 5432320 | consumed tokens: 11125391360 | elapsed time per iteration (s): 0.30 | learning rate: 2.011E-05 | global batch size: 256 | lm loss: 2.510207E+00 | grad norm: 0.270 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.271 | TFLOPs: 29.49 | +7: iteration 21230/ 21553 | consumed samples: 5434880 | consumed tokens: 11130634240 | elapsed time per iteration (s): 0.30 | learning rate: 2.010E-05 | global batch size: 256 | lm loss: 2.492228E+00 | grad norm: 0.294 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.322 | TFLOPs: 29.42 | +7: iteration 21240/ 21553 | consumed samples: 5437440 | consumed tokens: 11135877120 | elapsed time per iteration (s): 0.31 | learning rate: 2.010E-05 | global batch size: 256 | lm loss: 2.476948E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.295 | TFLOPs: 29.03 | +7: iteration 21250/ 21553 | consumed samples: 5440000 | consumed tokens: 11141120000 | elapsed time per iteration (s): 0.30 | learning rate: 2.009E-05 | global batch size: 256 | lm loss: 2.548515E+00 | grad norm: 0.290 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.911 | TFLOPs: 29.47 | +7: iteration 21260/ 21553 | consumed samples: 5442560 | consumed tokens: 11146362880 | elapsed time per iteration (s): 0.31 | learning rate: 2.008E-05 | global batch size: 256 | lm loss: 2.520410E+00 | grad norm: 0.279 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.037 | TFLOPs: 28.74 | +7: iteration 21270/ 21553 | consumed samples: 5445120 | consumed tokens: 11151605760 | elapsed time per iteration (s): 0.30 | learning rate: 2.008E-05 | global batch size: 256 | lm loss: 2.493718E+00 | grad norm: 0.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.970 | TFLOPs: 29.58 | +7: iteration 21280/ 21553 | consumed samples: 5447680 | consumed tokens: 11156848640 | elapsed time per iteration (s): 0.31 | learning rate: 2.007E-05 | global batch size: 256 | lm loss: 2.510112E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.736 | TFLOPs: 29.15 | +7: iteration 21290/ 21553 | consumed samples: 5450240 | consumed tokens: 11162091520 | elapsed time per iteration (s): 0.30 | learning rate: 2.007E-05 | global batch size: 256 | lm loss: 2.560064E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.383 | TFLOPs: 29.42 | +7: iteration 21300/ 21553 | consumed samples: 5452800 | consumed tokens: 11167334400 | elapsed time per iteration (s): 0.31 | learning rate: 2.006E-05 | global batch size: 256 | lm loss: 2.509110E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.582 | TFLOPs: 29.25 | +7: iteration 21310/ 21553 | consumed samples: 5455360 | consumed tokens: 11172577280 | elapsed time per iteration (s): 0.31 | learning rate: 2.006E-05 | global batch size: 256 | lm loss: 2.500139E+00 | grad norm: 0.282 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.272 | TFLOPs: 29.31 | +7: iteration 21320/ 21553 | consumed samples: 5457920 | consumed tokens: 11177820160 | elapsed time per iteration (s): 0.32 | learning rate: 2.005E-05 | global batch size: 256 | lm loss: 2.521377E+00 | grad norm: 0.277 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 789.756 | TFLOPs: 27.65 | +7: iteration 21330/ 21553 | consumed samples: 5460480 | consumed tokens: 11183063040 | elapsed time per iteration (s): 0.30 | learning rate: 2.005E-05 | global batch size: 256 | lm loss: 2.495726E+00 | grad norm: 0.312 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.782 | TFLOPs: 29.57 | +7: iteration 21340/ 21553 | consumed samples: 5463040 | consumed tokens: 11188305920 | elapsed time per iteration (s): 0.31 | learning rate: 2.004E-05 | global batch size: 256 | lm loss: 2.508752E+00 | grad norm: 0.299 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.862 | TFLOPs: 29.09 | +7: iteration 21350/ 21553 | consumed samples: 5465600 | consumed tokens: 11193548800 | elapsed time per iteration (s): 0.30 | learning rate: 2.004E-05 | global batch size: 256 | lm loss: 2.534673E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.611 | TFLOPs: 29.50 | +7: iteration 21360/ 21553 | consumed samples: 5468160 | consumed tokens: 11198791680 | elapsed time per iteration (s): 0.30 | learning rate: 2.004E-05 | global batch size: 256 | lm loss: 2.534114E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.385 | TFLOPs: 29.45 | +7: iteration 21370/ 21553 | consumed samples: 5470720 | consumed tokens: 11204034560 | elapsed time per iteration (s): 0.31 | learning rate: 2.003E-05 | global batch size: 256 | lm loss: 2.537153E+00 | grad norm: 0.278 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.282 | TFLOPs: 28.89 | +7: iteration 21380/ 21553 | consumed samples: 5473280 | consumed tokens: 11209277440 | elapsed time per iteration (s): 0.31 | learning rate: 2.003E-05 | global batch size: 256 | lm loss: 2.479846E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.848 | TFLOPs: 29.30 | +7: iteration 21390/ 21553 | consumed samples: 5475840 | consumed tokens: 11214520320 | elapsed time per iteration (s): 0.30 | learning rate: 2.003E-05 | global batch size: 256 | lm loss: 2.518854E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.837 | TFLOPs: 29.79 | +7: iteration 21400/ 21553 | consumed samples: 5478400 | consumed tokens: 11219763200 | elapsed time per iteration (s): 0.31 | learning rate: 2.002E-05 | global batch size: 256 | lm loss: 2.514130E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.654 | TFLOPs: 29.22 | +7: iteration 21410/ 21553 | consumed samples: 5480960 | consumed tokens: 11225006080 | elapsed time per iteration (s): 0.30 | learning rate: 2.002E-05 | global batch size: 256 | lm loss: 2.538030E+00 | grad norm: 0.306 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.093 | TFLOPs: 29.83 | +7: iteration 21420/ 21553 | consumed samples: 5483520 | consumed tokens: 11230248960 | elapsed time per iteration (s): 0.30 | learning rate: 2.002E-05 | global batch size: 256 | lm loss: 2.517902E+00 | grad norm: 0.283 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.837 | TFLOPs: 29.82 | +7: iteration 21430/ 21553 | consumed samples: 5486080 | consumed tokens: 11235491840 | elapsed time per iteration (s): 0.31 | learning rate: 2.001E-05 | global batch size: 256 | lm loss: 2.547098E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.417 | TFLOPs: 29.07 | +7: iteration 21440/ 21553 | consumed samples: 5488640 | consumed tokens: 11240734720 | elapsed time per iteration (s): 0.30 | learning rate: 2.001E-05 | global batch size: 256 | lm loss: 2.532064E+00 | grad norm: 0.285 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.035 | TFLOPs: 29.51 | +7: iteration 21450/ 21553 | consumed samples: 5491200 | consumed tokens: 11245977600 | elapsed time per iteration (s): 0.30 | learning rate: 2.001E-05 | global batch size: 256 | lm loss: 2.523104E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.413 | TFLOPs: 29.39 | +7: iteration 21460/ 21553 | consumed samples: 5493760 | consumed tokens: 11251220480 | elapsed time per iteration (s): 0.31 | learning rate: 2.001E-05 | global batch size: 256 | lm loss: 2.499373E+00 | grad norm: 0.289 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 824.317 | TFLOPs: 28.86 | +7: iteration 21470/ 21553 | consumed samples: 5496320 | consumed tokens: 11256463360 | elapsed time per iteration (s): 0.30 | learning rate: 2.001E-05 | global batch size: 256 | lm loss: 2.501291E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.193 | TFLOPs: 29.73 | +7: iteration 21480/ 21553 | consumed samples: 5498880 | consumed tokens: 11261706240 | elapsed time per iteration (s): 0.30 | learning rate: 2.001E-05 | global batch size: 256 | lm loss: 2.508778E+00 | grad norm: 0.287 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.670 | TFLOPs: 29.53 | +7: iteration 21490/ 21553 | consumed samples: 5501440 | consumed tokens: 11266949120 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 2.542204E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.730 | TFLOPs: 29.78 | +7: iteration 21500/ 21553 | consumed samples: 5504000 | consumed tokens: 11272192000 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 2.517018E+00 | grad norm: 0.281 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.747 | TFLOPs: 28.91 | +7: iteration 21510/ 21553 | consumed samples: 5506560 | consumed tokens: 11277434880 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 2.480084E+00 | grad norm: 0.274 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.960 | TFLOPs: 29.61 | +7: iteration 21520/ 21553 | consumed samples: 5509120 | consumed tokens: 11282677760 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 2.526701E+00 | grad norm: 0.311 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.905 | TFLOPs: 28.95 | +7: iteration 21530/ 21553 | consumed samples: 5511680 | consumed tokens: 11287920640 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 2.486632E+00 | grad norm: 0.284 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.331 | TFLOPs: 29.31 | +7: iteration 21540/ 21553 | consumed samples: 5514240 | consumed tokens: 11293163520 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 2.470091E+00 | grad norm: 0.291 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.482 | TFLOPs: 29.60 | +7: iteration 21550/ 21553 | consumed samples: 5516800 | consumed tokens: 11298406400 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 2.528057E+00 | grad norm: 0.307 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.838 | TFLOPs: 29.68 | +0: [after training is done] datetime: 2022-11-25 19:22:53 +0: saving checkpoint at iteration 21553 to checkpoints_146m +7: ------------------------------------------------------------------------------------------------------------ +7: valid loss at the end of training for val data | lm loss value: 2.430283E+00 | lm loss PPL: 1.136210E+01 | +7: ------------------------------------------------------------------------------------------------------------ +0: [2022-11-25 19:22:54,073] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step21553 is begin to save! +0: [2022-11-25 19:22:54,076] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_01-model_00-model_states.pt... +0: [2022-11-25 19:22:54,168] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_01-model_00-model_states.pt. +0: [2022-11-25 19:22:54,169] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_03-model_00-model_states.pt... +0: [2022-11-25 19:22:54,184] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_03-model_00-model_states.pt. +0: [2022-11-25 19:22:54,185] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_04-model_00-model_states.pt... +0: [2022-11-25 19:22:54,199] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_04-model_00-model_states.pt. +0: [2022-11-25 19:22:54,200] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_05-model_00-model_states.pt... +0: [2022-11-25 19:22:54,214] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_05-model_00-model_states.pt. +0: [2022-11-25 19:22:54,215] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_06-model_00-model_states.pt... +0: [2022-11-25 19:22:54,229] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_06-model_00-model_states.pt. +0: [2022-11-25 19:22:54,230] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_07-model_00-model_states.pt... +0: [2022-11-25 19:22:54,244] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_07-model_00-model_states.pt. +0: [2022-11-25 19:22:54,244] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_08-model_00-model_states.pt... +0: [2022-11-25 19:22:54,259] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_08-model_00-model_states.pt. +0: [2022-11-25 19:22:54,259] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_09-model_00-model_states.pt... +0: [2022-11-25 19:22:54,275] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_09-model_00-model_states.pt. +0: [2022-11-25 19:22:54,276] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_10-model_00-model_states.pt... +0: [2022-11-25 19:22:54,290] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_10-model_00-model_states.pt. +0: [2022-11-25 19:22:54,291] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_11-model_00-model_states.pt... +0: [2022-11-25 19:22:54,305] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_11-model_00-model_states.pt. +0: [2022-11-25 19:22:54,305] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_12-model_00-model_states.pt... +0: [2022-11-25 19:22:54,320] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_12-model_00-model_states.pt. +0: [2022-11-25 19:22:54,320] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_13-model_00-model_states.pt... +0: [2022-11-25 19:22:54,335] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_13-model_00-model_states.pt. +0: [2022-11-25 19:22:54,335] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_14-model_00-model_states.pt... +0: [2022-11-25 19:22:54,350] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_14-model_00-model_states.pt. +0: [2022-11-25 19:22:54,350] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_15-model_00-model_states.pt... +0: [2022-11-25 19:22:54,365] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_15-model_00-model_states.pt. +0: [2022-11-25 19:22:54,365] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_16-model_00-model_states.pt... +0: [2022-11-25 19:22:54,379] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_16-model_00-model_states.pt. +0: [2022-11-25 19:22:54,380] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_17-model_00-model_states.pt... +0: [2022-11-25 19:22:54,394] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_17-model_00-model_states.pt. +0: [2022-11-25 19:22:54,394] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/layer_19-model_00-model_states.pt... +0: [2022-11-25 19:22:54,395] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/layer_19-model_00-model_states.pt. +0: [2022-11-25 19:22:54,396] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m/global_step21553/mp_rank_00_model_states.pt +0: [2022-11-25 19:22:54,396] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/mp_rank_00_model_states.pt... +0: [2022-11-25 19:22:54,398] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/mp_rank_00_model_states.pt. +0: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +0: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +1: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +6: [2022-11-25 19:22:54,416] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +3: [2022-11-25 19:22:54,446] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:22:54,446] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2022-11-25 19:22:54,446] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2022-11-25 19:22:54,450] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:22:54,450] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:22:54,450] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2022-11-25 19:22:54,450] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2022-11-25 19:22:54,450] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2022-11-25 19:22:54,450] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:22:54,451] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:22:54,451] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +1: [2022-11-25 19:22:54,451] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +2: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:22:54,451] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +2: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2022-11-25 19:22:54,451] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2022-11-25 19:22:54,451] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:22:54,451] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2022-11-25 19:22:54,451] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +1: [2022-11-25 19:22:54,451] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +7: [2022-11-25 19:22:54,451] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2022-11-25 19:22:54,451] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:22:54,452] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2022-11-25 19:22:54,452] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2022-11-25 19:22:54,452] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:22:54,452] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2022-11-25 19:22:54,452] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2022-11-25 19:22:54,452] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:22:54,452] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2022-11-25 19:22:54,452] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2022-11-25 19:22:54,452] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:22:54,452] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2022-11-25 19:22:54,452] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2022-11-25 19:22:54,452] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:22:54,452] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2022-11-25 19:22:54,452] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2022-11-25 19:22:54,453] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:22:54,453] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2022-11-25 19:22:54,453] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2022-11-25 19:22:54,453] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:22:54,453] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:22:54,453] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +7: [2022-11-25 19:22:54,453] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +1: [2022-11-25 19:22:54,453] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2022-11-25 19:22:54,453] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2022-11-25 19:22:54,453] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:22:54,453] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2022-11-25 19:22:54,453] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:22:54,454] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2022-11-25 19:22:54,454] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +4: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2022-11-25 19:22:54,454] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2022-11-25 19:22:54,454] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:22:54,454] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +4: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:22:54,454] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +5: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2022-11-25 19:22:54,454] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2022-11-25 19:22:54,454] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:22:54,455] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2022-11-25 19:22:54,455] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2022-11-25 19:22:54,455] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:22:54,455] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +3: [2022-11-25 19:22:54,455] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:22:54,455] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2022-11-25 19:22:54,455] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2022-11-25 19:22:54,455] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2022-11-25 19:22:54,455] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:22:54,456] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +0: [2022-11-25 19:22:54,455] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:22:54,456] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2022-11-25 19:22:54,456] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2022-11-25 19:22:54,456] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2022-11-25 19:22:54,456] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:22:54,456] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +1: [2022-11-25 19:22:54,456] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +7: [2022-11-25 19:22:54,456] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2022-11-25 19:22:54,456] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2022-11-25 19:22:54,456] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:22:54,457] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +5: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:22:54,457] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2022-11-25 19:22:54,457] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:22:54,457] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:22:54,457] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2022-11-25 19:22:54,457] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2022-11-25 19:22:54,457] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2022-11-25 19:22:54,457] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2022-11-25 19:22:54,458] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:22:54,458] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2022-11-25 19:22:54,458] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2022-11-25 19:22:54,458] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:22:54,458] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2022-11-25 19:22:54,458] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2022-11-25 19:22:54,458] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:22:54,459] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +7: [2022-11-25 19:22:54,458] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +2: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2022-11-25 19:22:54,459] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:22:54,459] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +2: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +3: [2022-11-25 19:22:54,459] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +1: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2022-11-25 19:22:54,459] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:22:54,459] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2022-11-25 19:22:54,459] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2022-11-25 19:22:54,459] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2022-11-25 19:22:54,459] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2022-11-25 19:22:54,460] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2022-11-25 19:22:54,460] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2022-11-25 19:22:54,460] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2022-11-25 19:22:54,459] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:22:54,460] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2022-11-25 19:22:54,460] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2022-11-25 19:22:54,460] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2022-11-25 19:22:54,463] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:22:54,463] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2022-11-25 19:22:54,463] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2022-11-25 19:22:54,464] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2022-11-25 19:22:54,464] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2022-11-25 19:22:54,464] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2022-11-25 19:22:54,468] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:22:54,468] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:22:54,468] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2022-11-25 19:22:54,468] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2022-11-25 19:22:54,468] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2022-11-25 19:22:54,468] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2022-11-25 19:22:54,468] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:22:54,468] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2022-11-25 19:22:54,469] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2022-11-25 19:22:54,469] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:22:54,469] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2022-11-25 19:22:54,469] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2022-11-25 19:22:54,469] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2022-11-25 19:22:54,469] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2022-11-25 19:22:54,469] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2022-11-25 19:22:54,477] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2022-11-25 19:22:54,477] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2022-11-25 19:22:54,477] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2022-11-25 19:22:54,482] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2022-11-25 19:22:54,482] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: successfully saved checkpoint at iteration 21553 to checkpoints_146m +7: ------------------------------------------------------------------------------------------------------------ +7: test loss at the end of training for test data | lm loss value: 2.427787E+00 | lm loss PPL: 1.133378E+01 | +7: ------------------------------------------------------------------------------------------------------------ +END 2072536: Fri Nov 25 19:23:06 EET 2022 diff --git a/146m14b14b/sbatch_146mpile.sh b/146m14b14b/sbatch_146mpile.sh new file mode 100644 index 0000000000000000000000000000000000000000..692c73b5852b29ca470c56c3ad761a3fe16e0a3c --- /dev/null +++ b/146m14b14b/sbatch_146mpile.sh @@ -0,0 +1,160 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p eap +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=146m + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT +# Start from scratch +rm -rf "$CHECKPOINT_PATH" "$TENSORBOARD_PATH" + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_140M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 11300000000 +# -> Samples: 5517578 +TRAIN_SAMPLES=5_517_578 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 55_176 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --data-path $DATA_PATH \ + --data-impl mmap \ + --split 949,50,1 \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/146m14b14b/tensorboard/146m14b14bpile/events.out.tfevents.1669390252.nid005079.65599.0 b/146m14b14b/tensorboard/146m14b14bpile/events.out.tfevents.1669390252.nid005079.65599.0 new file mode 100644 index 0000000000000000000000000000000000000000..a0fad534d731ec1394c08bfe90354386c088f543 --- /dev/null +++ b/146m14b14b/tensorboard/146m14b14bpile/events.out.tfevents.1669390252.nid005079.65599.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b04ef845deaabb69587eb710b922607fae2a2da0c3668b72ae24c0bb671c778 +size 38441329 diff --git a/146m14b14b/tensorboard/lm1-146m-14b-inv-8e-4/2074592/events.out.tfevents.1669477427.nid005091.62835.0 b/146m14b14b/tensorboard/lm1-146m-14b-inv-8e-4/2074592/events.out.tfevents.1669477427.nid005091.62835.0 new file mode 100644 index 0000000000000000000000000000000000000000..e7f9274d56ad284442104c68775c3ac56baf47fa --- /dev/null +++ b/146m14b14b/tensorboard/lm1-146m-14b-inv-8e-4/2074592/events.out.tfevents.1669477427.nid005091.62835.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a13463f486f31e48756335d578787eda533a1536599708059443371dd7aa6e7 +size 38441658 diff --git a/146m14b14b/tensorboard/lm1-146m-14b-inv/2074187/events.out.tfevents.1669468145.nid005099.44584.0 b/146m14b14b/tensorboard/lm1-146m-14b-inv/2074187/events.out.tfevents.1669468145.nid005099.44584.0 new file mode 100644 index 0000000000000000000000000000000000000000..a5fa6a57326439a6a40edbb6e8965ed656593488 --- /dev/null +++ b/146m14b14b/tensorboard/lm1-146m-14b-inv/2074187/events.out.tfevents.1669468145.nid005099.44584.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7403c5c1475486de34f9bfb09183f028ff68ceb63b5347c601be48ba06b099b6 +size 38441643 diff --git a/146m14b14b/transformers/config.json b/146m14b14b/transformers/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639868e787b6f7945a258cf690dc9b5dba7be4a6 --- /dev/null +++ b/146m14b14b/transformers/config.json @@ -0,0 +1 @@ +{"vocab_size": 50304, "n_positions": 2048, "n_embd": 768, "n_layer": 15, "n_head": 12, "n_inner": 3072, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"} \ No newline at end of file diff --git a/146m14b14b/transformers/pytorch_model.bin b/146m14b14b/transformers/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c5a813284e6ee4dda98a9d950f90c9f2d3c3cf32 --- /dev/null +++ b/146m14b14b/transformers/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41fbac2dbedc4925639adb2b76e0dc8d77403cc6a8cec0f48160dd69effa930 +size 418947557