Commit
•
2428de5
1
Parent(s):
62be092
Add
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 4b284b17bc4seed2/evaluation/generation/agg.4b284b17bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_0.json +1 -0
- 4b284b17bc4seed2/evaluation/generation/agg.4b284b17bc4seed2_gem_xsum_article_DOC_summary_0.json +1 -0
- 4b284b17bc4seed2/evaluation/generation/agg.4b284b17bc4seed2_gem_xsum_article_DOC_summary_1.json +1 -0
- 4b284b17bc4seed2/evaluation/generation/examples.4b284b17bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl +3 -0
- 4b284b17bc4seed2/evaluation/generation/examples.4b284b17bc4seed2_gem_xsum_article_DOC_summary_0.jsonl +3 -0
- 4b284b17bc4seed2/evaluation/generation/examples.4b284b17bc4seed2_gem_xsum_article_DOC_summary_1.jsonl +3 -0
- 4b284b17bc4seed2/evaluation/generation/slim.4b284b17bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_0.json +133 -0
- 4b284b17bc4seed2/evaluation/generation/slim.4b284b17bc4seed2_gem_xsum_article_DOC_summary_0.json +133 -0
- 4b284b17bc4seed2/evaluation/generation/slim.4b284b17bc4seed2_gem_xsum_article_DOC_summary_1.json +133 -0
- 4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_0.json +32 -0
- 4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_0_lm-eval_global_step80108_2023-02-26-09-23-54_0shots_backup.json +32 -0
- 4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_1.json +32 -0
- 4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_1_lm-eval_global_step80108_2023-02-26-09-23-54_1shots_backup.json +32 -0
- 4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_2.json +16 -0
- 4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_2_lm-eval_global_step80108_2023-02-26-09-23-54_2shots_backup.json +16 -0
- 4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_3.json +16 -0
- 4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_3_lm-eval_global_step80108_2023-02-26-09-23-54_3shots_backup.json +16 -0
- 4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_4.json +11 -0
- 4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_4_lm-eval_global_step80108_2023-02-26-09-23-54_4shots_backup.json +11 -0
- 4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_5.json +11 -0
- 4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_5_lm-eval_global_step80108_2023-02-26-09-23-54_5shots_backup.json +11 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_100_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_101_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_102_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_103_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_104_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_105_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_106_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_107_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_108_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_109_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_110_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_111_mp_rank_01_optim_states.pt +3 -0
- 4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt +3 -0
4b284b17bc4seed2/evaluation/generation/agg.4b284b17bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_0.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 1.3562210829072388, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06363294711119734}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.11009512311383156, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0020727913604168193}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.16243977504759669, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002228768903465126}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.12013040778127415, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017746386859163203}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.016237573503050346, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005791686929917277}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.027893395046500822, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001039576769487543}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.01952192502751282, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0007045263606140751}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.10534269523932902, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018942650621943632}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.1589275088375994, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021856832752487143}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.1164630301972042, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016922702496737136}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.09138176673759738, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016976459528528838}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.13851750677859065, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.001981721576730165}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.1010326136243227, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015162989057978518}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed2/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed2/evaluation/generation/agg.4b284b17bc4seed2_gem_xsum_article_DOC_summary_0.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.16625735855106855, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0024463651813182914}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.3230884270864929, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004359311826645138}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.20872501710774183, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0025765264814385574}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.03520681144190837, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0013363705236252219}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.07441126206565717, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0027685648685041907}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.04541415352191168, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016019047132235528}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.12527352638980324, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0018832520008059408}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.24649639309519972, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0035131508278542895}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.15771639364031548, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001968467651802171}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.12771639136222968, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001971864037023057}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2517880179492348, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003783114418327505}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.16106544122061872, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002144088469758669}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 2.036660647928376, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0886671989991428}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed2/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed2/evaluation/generation/agg.4b284b17bc4seed2_gem_xsum_article_DOC_summary_1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.13473651953646212, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018830579225946448}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.33263109371075555, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0043737298284748935}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.18955906449065837, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0025437030392446553}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.02979980903695056, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010699057640840117}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.07661622521673665, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00277147224226074}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.04240941625507146, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015134151293483083}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10545507286869261, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014282925798028265}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.2622174946749768, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0034532690725241335}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.14860417887228614, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019427079439186818}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.10644896438074114, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015535641404371687}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.26570822729983273, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003832781853814249}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.1502154921172796, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0021382854049990575}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.6913128558245667, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09521771990115542}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed2/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
4b284b17bc4seed2/evaluation/generation/examples.4b284b17bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5acf9e32092153cf8f0af5c91853b34d9b865cdebba19399b5ad1f802f36bb55
|
3 |
+
size 4454140
|
4b284b17bc4seed2/evaluation/generation/examples.4b284b17bc4seed2_gem_xsum_article_DOC_summary_0.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed45a900f08a095774f6f224b39bd55c362922ea32bc8f0c0bd18cef7b82e73a
|
3 |
+
size 2788220
|
4b284b17bc4seed2/evaluation/generation/examples.4b284b17bc4seed2_gem_xsum_article_DOC_summary_1.jsonl
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81016c90af5ffef779f0cb474c304822fd5faf4d585ef531961dff3876ada5a4
|
3 |
+
size 5106203
|
4b284b17bc4seed2/evaluation/generation/slim.4b284b17bc4seed2_e2e_nlg_cleaned_generate_text_restaurant_0.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "e2e_nlg_cleaned",
|
5 |
+
"prompt_name": "generate_text_restaurant",
|
6 |
+
"bleu": 1.3562210829072388,
|
7 |
+
"dataset_path": "e2e_nlg_cleaned",
|
8 |
+
"dataset_name": null,
|
9 |
+
"subset": null,
|
10 |
+
"bleu_stderr": 0.06363294711119734
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "e2e_nlg_cleaned",
|
14 |
+
"prompt_name": "generate_text_restaurant",
|
15 |
+
"rouge1_precision": 0.11009512311383156,
|
16 |
+
"dataset_path": "e2e_nlg_cleaned",
|
17 |
+
"dataset_name": null,
|
18 |
+
"subset": null,
|
19 |
+
"rouge1_precision_stderr": 0.0020727913604168193
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "e2e_nlg_cleaned",
|
23 |
+
"prompt_name": "generate_text_restaurant",
|
24 |
+
"rouge1_recall": 0.16243977504759669,
|
25 |
+
"dataset_path": "e2e_nlg_cleaned",
|
26 |
+
"dataset_name": null,
|
27 |
+
"subset": null,
|
28 |
+
"rouge1_recall_stderr": 0.002228768903465126
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "e2e_nlg_cleaned",
|
32 |
+
"prompt_name": "generate_text_restaurant",
|
33 |
+
"rouge1_fmeasure": 0.12013040778127415,
|
34 |
+
"dataset_path": "e2e_nlg_cleaned",
|
35 |
+
"dataset_name": null,
|
36 |
+
"subset": null,
|
37 |
+
"rouge1_fmeasure_stderr": 0.0017746386859163203
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "e2e_nlg_cleaned",
|
41 |
+
"prompt_name": "generate_text_restaurant",
|
42 |
+
"rouge2_precision": 0.016237573503050346,
|
43 |
+
"dataset_path": "e2e_nlg_cleaned",
|
44 |
+
"dataset_name": null,
|
45 |
+
"subset": null,
|
46 |
+
"rouge2_precision_stderr": 0.0005791686929917277
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "e2e_nlg_cleaned",
|
50 |
+
"prompt_name": "generate_text_restaurant",
|
51 |
+
"rouge2_recall": 0.027893395046500822,
|
52 |
+
"dataset_path": "e2e_nlg_cleaned",
|
53 |
+
"dataset_name": null,
|
54 |
+
"subset": null,
|
55 |
+
"rouge2_recall_stderr": 0.001039576769487543
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "e2e_nlg_cleaned",
|
59 |
+
"prompt_name": "generate_text_restaurant",
|
60 |
+
"rouge2_fmeasure": 0.01952192502751282,
|
61 |
+
"dataset_path": "e2e_nlg_cleaned",
|
62 |
+
"dataset_name": null,
|
63 |
+
"subset": null,
|
64 |
+
"rouge2_fmeasure_stderr": 0.0007045263606140751
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "e2e_nlg_cleaned",
|
68 |
+
"prompt_name": "generate_text_restaurant",
|
69 |
+
"rougeL_precision": 0.10534269523932902,
|
70 |
+
"dataset_path": "e2e_nlg_cleaned",
|
71 |
+
"dataset_name": null,
|
72 |
+
"subset": null,
|
73 |
+
"rougeL_precision_stderr": 0.0018942650621943632
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "e2e_nlg_cleaned",
|
77 |
+
"prompt_name": "generate_text_restaurant",
|
78 |
+
"rougeL_recall": 0.1589275088375994,
|
79 |
+
"dataset_path": "e2e_nlg_cleaned",
|
80 |
+
"dataset_name": null,
|
81 |
+
"subset": null,
|
82 |
+
"rougeL_recall_stderr": 0.0021856832752487143
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "e2e_nlg_cleaned",
|
86 |
+
"prompt_name": "generate_text_restaurant",
|
87 |
+
"rougeL_fmeasure": 0.1164630301972042,
|
88 |
+
"dataset_path": "e2e_nlg_cleaned",
|
89 |
+
"dataset_name": null,
|
90 |
+
"subset": null,
|
91 |
+
"rougeL_fmeasure_stderr": 0.0016922702496737136
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "e2e_nlg_cleaned",
|
95 |
+
"prompt_name": "generate_text_restaurant",
|
96 |
+
"rougeLsum_precision": 0.09138176673759738,
|
97 |
+
"dataset_path": "e2e_nlg_cleaned",
|
98 |
+
"dataset_name": null,
|
99 |
+
"subset": null,
|
100 |
+
"rougeLsum_precision_stderr": 0.0016976459528528838
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "e2e_nlg_cleaned",
|
104 |
+
"prompt_name": "generate_text_restaurant",
|
105 |
+
"rougeLsum_recall": 0.13851750677859065,
|
106 |
+
"dataset_path": "e2e_nlg_cleaned",
|
107 |
+
"dataset_name": null,
|
108 |
+
"subset": null,
|
109 |
+
"rougeLsum_recall_stderr": 0.001981721576730165
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "e2e_nlg_cleaned",
|
113 |
+
"prompt_name": "generate_text_restaurant",
|
114 |
+
"rougeLsum_fmeasure": 0.1010326136243227,
|
115 |
+
"dataset_path": "e2e_nlg_cleaned",
|
116 |
+
"dataset_name": null,
|
117 |
+
"subset": null,
|
118 |
+
"rougeLsum_fmeasure_stderr": 0.0015162989057978518
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed2/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 0,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
4b284b17bc4seed2/evaluation/generation/slim.4b284b17bc4seed2_gem_xsum_article_DOC_summary_0.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "gem_xsum",
|
5 |
+
"prompt_name": "article_DOC_summary",
|
6 |
+
"rouge1_precision": 0.16625735855106855,
|
7 |
+
"dataset_path": "GEM/xsum",
|
8 |
+
"dataset_name": null,
|
9 |
+
"subset": "",
|
10 |
+
"rouge1_precision_stderr": 0.0024463651813182914
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "gem_xsum",
|
14 |
+
"prompt_name": "article_DOC_summary",
|
15 |
+
"rouge1_recall": 0.3230884270864929,
|
16 |
+
"dataset_path": "GEM/xsum",
|
17 |
+
"dataset_name": null,
|
18 |
+
"subset": "",
|
19 |
+
"rouge1_recall_stderr": 0.004359311826645138
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "gem_xsum",
|
23 |
+
"prompt_name": "article_DOC_summary",
|
24 |
+
"rouge1_fmeasure": 0.20872501710774183,
|
25 |
+
"dataset_path": "GEM/xsum",
|
26 |
+
"dataset_name": null,
|
27 |
+
"subset": "",
|
28 |
+
"rouge1_fmeasure_stderr": 0.0025765264814385574
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "gem_xsum",
|
32 |
+
"prompt_name": "article_DOC_summary",
|
33 |
+
"rouge2_precision": 0.03520681144190837,
|
34 |
+
"dataset_path": "GEM/xsum",
|
35 |
+
"dataset_name": null,
|
36 |
+
"subset": "",
|
37 |
+
"rouge2_precision_stderr": 0.0013363705236252219
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "gem_xsum",
|
41 |
+
"prompt_name": "article_DOC_summary",
|
42 |
+
"rouge2_recall": 0.07441126206565717,
|
43 |
+
"dataset_path": "GEM/xsum",
|
44 |
+
"dataset_name": null,
|
45 |
+
"subset": "",
|
46 |
+
"rouge2_recall_stderr": 0.0027685648685041907
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "gem_xsum",
|
50 |
+
"prompt_name": "article_DOC_summary",
|
51 |
+
"rouge2_fmeasure": 0.04541415352191168,
|
52 |
+
"dataset_path": "GEM/xsum",
|
53 |
+
"dataset_name": null,
|
54 |
+
"subset": "",
|
55 |
+
"rouge2_fmeasure_stderr": 0.0016019047132235528
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "gem_xsum",
|
59 |
+
"prompt_name": "article_DOC_summary",
|
60 |
+
"rougeL_precision": 0.12527352638980324,
|
61 |
+
"dataset_path": "GEM/xsum",
|
62 |
+
"dataset_name": null,
|
63 |
+
"subset": "",
|
64 |
+
"rougeL_precision_stderr": 0.0018832520008059408
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "gem_xsum",
|
68 |
+
"prompt_name": "article_DOC_summary",
|
69 |
+
"rougeL_recall": 0.24649639309519972,
|
70 |
+
"dataset_path": "GEM/xsum",
|
71 |
+
"dataset_name": null,
|
72 |
+
"subset": "",
|
73 |
+
"rougeL_recall_stderr": 0.0035131508278542895
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "gem_xsum",
|
77 |
+
"prompt_name": "article_DOC_summary",
|
78 |
+
"rougeL_fmeasure": 0.15771639364031548,
|
79 |
+
"dataset_path": "GEM/xsum",
|
80 |
+
"dataset_name": null,
|
81 |
+
"subset": "",
|
82 |
+
"rougeL_fmeasure_stderr": 0.001968467651802171
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "gem_xsum",
|
86 |
+
"prompt_name": "article_DOC_summary",
|
87 |
+
"rougeLsum_precision": 0.12771639136222968,
|
88 |
+
"dataset_path": "GEM/xsum",
|
89 |
+
"dataset_name": null,
|
90 |
+
"subset": "",
|
91 |
+
"rougeLsum_precision_stderr": 0.001971864037023057
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "gem_xsum",
|
95 |
+
"prompt_name": "article_DOC_summary",
|
96 |
+
"rougeLsum_recall": 0.2517880179492348,
|
97 |
+
"dataset_path": "GEM/xsum",
|
98 |
+
"dataset_name": null,
|
99 |
+
"subset": "",
|
100 |
+
"rougeLsum_recall_stderr": 0.003783114418327505
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "gem_xsum",
|
104 |
+
"prompt_name": "article_DOC_summary",
|
105 |
+
"rougeLsum_fmeasure": 0.16106544122061872,
|
106 |
+
"dataset_path": "GEM/xsum",
|
107 |
+
"dataset_name": null,
|
108 |
+
"subset": "",
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.002144088469758669
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "gem_xsum",
|
113 |
+
"prompt_name": "article_DOC_summary",
|
114 |
+
"bleu": 2.036660647928376,
|
115 |
+
"dataset_path": "GEM/xsum",
|
116 |
+
"dataset_name": null,
|
117 |
+
"subset": "",
|
118 |
+
"bleu_stderr": 0.0886671989991428
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed2/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 0,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
4b284b17bc4seed2/evaluation/generation/slim.4b284b17bc4seed2_gem_xsum_article_DOC_summary_1.json
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": [
|
3 |
+
{
|
4 |
+
"task_name": "gem_xsum",
|
5 |
+
"prompt_name": "article_DOC_summary",
|
6 |
+
"rouge1_precision": 0.13473651953646212,
|
7 |
+
"dataset_path": "GEM/xsum",
|
8 |
+
"dataset_name": null,
|
9 |
+
"subset": "",
|
10 |
+
"rouge1_precision_stderr": 0.0018830579225946448
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"task_name": "gem_xsum",
|
14 |
+
"prompt_name": "article_DOC_summary",
|
15 |
+
"rouge1_recall": 0.33263109371075555,
|
16 |
+
"dataset_path": "GEM/xsum",
|
17 |
+
"dataset_name": null,
|
18 |
+
"subset": "",
|
19 |
+
"rouge1_recall_stderr": 0.0043737298284748935
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"task_name": "gem_xsum",
|
23 |
+
"prompt_name": "article_DOC_summary",
|
24 |
+
"rouge1_fmeasure": 0.18955906449065837,
|
25 |
+
"dataset_path": "GEM/xsum",
|
26 |
+
"dataset_name": null,
|
27 |
+
"subset": "",
|
28 |
+
"rouge1_fmeasure_stderr": 0.0025437030392446553
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"task_name": "gem_xsum",
|
32 |
+
"prompt_name": "article_DOC_summary",
|
33 |
+
"rouge2_precision": 0.02979980903695056,
|
34 |
+
"dataset_path": "GEM/xsum",
|
35 |
+
"dataset_name": null,
|
36 |
+
"subset": "",
|
37 |
+
"rouge2_precision_stderr": 0.0010699057640840117
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"task_name": "gem_xsum",
|
41 |
+
"prompt_name": "article_DOC_summary",
|
42 |
+
"rouge2_recall": 0.07661622521673665,
|
43 |
+
"dataset_path": "GEM/xsum",
|
44 |
+
"dataset_name": null,
|
45 |
+
"subset": "",
|
46 |
+
"rouge2_recall_stderr": 0.00277147224226074
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"task_name": "gem_xsum",
|
50 |
+
"prompt_name": "article_DOC_summary",
|
51 |
+
"rouge2_fmeasure": 0.04240941625507146,
|
52 |
+
"dataset_path": "GEM/xsum",
|
53 |
+
"dataset_name": null,
|
54 |
+
"subset": "",
|
55 |
+
"rouge2_fmeasure_stderr": 0.0015134151293483083
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"task_name": "gem_xsum",
|
59 |
+
"prompt_name": "article_DOC_summary",
|
60 |
+
"rougeL_precision": 0.10545507286869261,
|
61 |
+
"dataset_path": "GEM/xsum",
|
62 |
+
"dataset_name": null,
|
63 |
+
"subset": "",
|
64 |
+
"rougeL_precision_stderr": 0.0014282925798028265
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"task_name": "gem_xsum",
|
68 |
+
"prompt_name": "article_DOC_summary",
|
69 |
+
"rougeL_recall": 0.2622174946749768,
|
70 |
+
"dataset_path": "GEM/xsum",
|
71 |
+
"dataset_name": null,
|
72 |
+
"subset": "",
|
73 |
+
"rougeL_recall_stderr": 0.0034532690725241335
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"task_name": "gem_xsum",
|
77 |
+
"prompt_name": "article_DOC_summary",
|
78 |
+
"rougeL_fmeasure": 0.14860417887228614,
|
79 |
+
"dataset_path": "GEM/xsum",
|
80 |
+
"dataset_name": null,
|
81 |
+
"subset": "",
|
82 |
+
"rougeL_fmeasure_stderr": 0.0019427079439186818
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"task_name": "gem_xsum",
|
86 |
+
"prompt_name": "article_DOC_summary",
|
87 |
+
"rougeLsum_precision": 0.10644896438074114,
|
88 |
+
"dataset_path": "GEM/xsum",
|
89 |
+
"dataset_name": null,
|
90 |
+
"subset": "",
|
91 |
+
"rougeLsum_precision_stderr": 0.0015535641404371687
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"task_name": "gem_xsum",
|
95 |
+
"prompt_name": "article_DOC_summary",
|
96 |
+
"rougeLsum_recall": 0.26570822729983273,
|
97 |
+
"dataset_path": "GEM/xsum",
|
98 |
+
"dataset_name": null,
|
99 |
+
"subset": "",
|
100 |
+
"rougeLsum_recall_stderr": 0.003832781853814249
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"task_name": "gem_xsum",
|
104 |
+
"prompt_name": "article_DOC_summary",
|
105 |
+
"rougeLsum_fmeasure": 0.1502154921172796,
|
106 |
+
"dataset_path": "GEM/xsum",
|
107 |
+
"dataset_name": null,
|
108 |
+
"subset": "",
|
109 |
+
"rougeLsum_fmeasure_stderr": 0.0021382854049990575
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"task_name": "gem_xsum",
|
113 |
+
"prompt_name": "article_DOC_summary",
|
114 |
+
"bleu": 1.6913128558245667,
|
115 |
+
"dataset_path": "GEM/xsum",
|
116 |
+
"dataset_name": null,
|
117 |
+
"subset": "",
|
118 |
+
"bleu_stderr": 0.09521771990115542
|
119 |
+
}
|
120 |
+
],
|
121 |
+
"config": {
|
122 |
+
"model": "hf-causal",
|
123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-c4seeds/4b284b17bc4seed2/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
124 |
+
"task_args": "",
|
125 |
+
"num_fewshot": 1,
|
126 |
+
"batch_size": 16,
|
127 |
+
"device": "cuda",
|
128 |
+
"use_cache": false,
|
129 |
+
"limit": 3000,
|
130 |
+
"bootstrap_iters": 10,
|
131 |
+
"seed": 1234
|
132 |
+
}
|
133 |
+
}
|
4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_0.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"anli_r1": {
|
4 |
+
"acc": 0.332,
|
5 |
+
"acc_stderr": 0.014899597242811482
|
6 |
+
},
|
7 |
+
"anli_r2": {
|
8 |
+
"acc": 0.329,
|
9 |
+
"acc_stderr": 0.014865395385928366
|
10 |
+
},
|
11 |
+
"anli_r3": {
|
12 |
+
"acc": 0.34,
|
13 |
+
"acc_stderr": 0.013680495725767789
|
14 |
+
},
|
15 |
+
"cb": {
|
16 |
+
"acc": 0.39285714285714285,
|
17 |
+
"acc_stderr": 0.0658538889806635,
|
18 |
+
"f1": 0.20817204301075268
|
19 |
+
},
|
20 |
+
"copa": {
|
21 |
+
"acc": 0.78,
|
22 |
+
"acc_stderr": 0.04163331998932261
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"versions": {
|
26 |
+
"anli_r1": 0,
|
27 |
+
"anli_r2": 0,
|
28 |
+
"anli_r3": 0,
|
29 |
+
"cb": 1,
|
30 |
+
"copa": 0
|
31 |
+
}
|
32 |
+
}
|
4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_0_lm-eval_global_step80108_2023-02-26-09-23-54_0shots_backup.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"anli_r1": {
|
4 |
+
"acc": 0.332,
|
5 |
+
"acc_stderr": 0.014899597242811482
|
6 |
+
},
|
7 |
+
"anli_r2": {
|
8 |
+
"acc": 0.329,
|
9 |
+
"acc_stderr": 0.014865395385928366
|
10 |
+
},
|
11 |
+
"anli_r3": {
|
12 |
+
"acc": 0.34,
|
13 |
+
"acc_stderr": 0.013680495725767789
|
14 |
+
},
|
15 |
+
"cb": {
|
16 |
+
"acc": 0.39285714285714285,
|
17 |
+
"acc_stderr": 0.0658538889806635,
|
18 |
+
"f1": 0.20817204301075268
|
19 |
+
},
|
20 |
+
"copa": {
|
21 |
+
"acc": 0.78,
|
22 |
+
"acc_stderr": 0.04163331998932261
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"versions": {
|
26 |
+
"anli_r1": 0,
|
27 |
+
"anli_r2": 0,
|
28 |
+
"anli_r3": 0,
|
29 |
+
"cb": 1,
|
30 |
+
"copa": 0
|
31 |
+
}
|
32 |
+
}
|
4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_1.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"anli_r1": {
|
4 |
+
"acc": 0.31,
|
5 |
+
"acc_stderr": 0.0146326386586329
|
6 |
+
},
|
7 |
+
"anli_r2": {
|
8 |
+
"acc": 0.324,
|
9 |
+
"acc_stderr": 0.014806864733738852
|
10 |
+
},
|
11 |
+
"anli_r3": {
|
12 |
+
"acc": 0.3416666666666667,
|
13 |
+
"acc_stderr": 0.013696658778002517
|
14 |
+
},
|
15 |
+
"cb": {
|
16 |
+
"acc": 0.5,
|
17 |
+
"acc_stderr": 0.06741998624632421,
|
18 |
+
"f1": 0.33520950594121324
|
19 |
+
},
|
20 |
+
"copa": {
|
21 |
+
"acc": 0.76,
|
22 |
+
"acc_stderr": 0.04292346959909283
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"versions": {
|
26 |
+
"anli_r1": 0,
|
27 |
+
"anli_r2": 0,
|
28 |
+
"anli_r3": 0,
|
29 |
+
"cb": 1,
|
30 |
+
"copa": 0
|
31 |
+
}
|
32 |
+
}
|
4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_1_lm-eval_global_step80108_2023-02-26-09-23-54_1shots_backup.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"anli_r1": {
|
4 |
+
"acc": 0.31,
|
5 |
+
"acc_stderr": 0.0146326386586329
|
6 |
+
},
|
7 |
+
"anli_r2": {
|
8 |
+
"acc": 0.324,
|
9 |
+
"acc_stderr": 0.014806864733738852
|
10 |
+
},
|
11 |
+
"anli_r3": {
|
12 |
+
"acc": 0.3416666666666667,
|
13 |
+
"acc_stderr": 0.013696658778002517
|
14 |
+
},
|
15 |
+
"cb": {
|
16 |
+
"acc": 0.5,
|
17 |
+
"acc_stderr": 0.06741998624632421,
|
18 |
+
"f1": 0.33520950594121324
|
19 |
+
},
|
20 |
+
"copa": {
|
21 |
+
"acc": 0.76,
|
22 |
+
"acc_stderr": 0.04292346959909283
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"versions": {
|
26 |
+
"anli_r1": 0,
|
27 |
+
"anli_r2": 0,
|
28 |
+
"anli_r3": 0,
|
29 |
+
"cb": 1,
|
30 |
+
"copa": 0
|
31 |
+
}
|
32 |
+
}
|
4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_2.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"anli_r1": {
|
4 |
+
"acc": 0.34,
|
5 |
+
"acc_stderr": 0.014987482264363937
|
6 |
+
},
|
7 |
+
"anli_r2": {
|
8 |
+
"acc": 0.321,
|
9 |
+
"acc_stderr": 0.014770821817934644
|
10 |
+
}
|
11 |
+
},
|
12 |
+
"versions": {
|
13 |
+
"anli_r1": 0,
|
14 |
+
"anli_r2": 0
|
15 |
+
}
|
16 |
+
}
|
4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_2_lm-eval_global_step80108_2023-02-26-09-23-54_2shots_backup.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"anli_r1": {
|
4 |
+
"acc": 0.34,
|
5 |
+
"acc_stderr": 0.014987482264363937
|
6 |
+
},
|
7 |
+
"anli_r2": {
|
8 |
+
"acc": 0.321,
|
9 |
+
"acc_stderr": 0.014770821817934644
|
10 |
+
}
|
11 |
+
},
|
12 |
+
"versions": {
|
13 |
+
"anli_r1": 0,
|
14 |
+
"anli_r2": 0
|
15 |
+
}
|
16 |
+
}
|
4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_3.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"anli_r1": {
|
4 |
+
"acc": 0.329,
|
5 |
+
"acc_stderr": 0.014865395385928366
|
6 |
+
},
|
7 |
+
"anli_r2": {
|
8 |
+
"acc": 0.327,
|
9 |
+
"acc_stderr": 0.01484221315341125
|
10 |
+
}
|
11 |
+
},
|
12 |
+
"versions": {
|
13 |
+
"anli_r1": 0,
|
14 |
+
"anli_r2": 0
|
15 |
+
}
|
16 |
+
}
|
4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_3_lm-eval_global_step80108_2023-02-26-09-23-54_3shots_backup.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"anli_r1": {
|
4 |
+
"acc": 0.329,
|
5 |
+
"acc_stderr": 0.014865395385928366
|
6 |
+
},
|
7 |
+
"anli_r2": {
|
8 |
+
"acc": 0.327,
|
9 |
+
"acc_stderr": 0.01484221315341125
|
10 |
+
}
|
11 |
+
},
|
12 |
+
"versions": {
|
13 |
+
"anli_r1": 0,
|
14 |
+
"anli_r2": 0
|
15 |
+
}
|
16 |
+
}
|
4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_4.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"anli_r1": {
|
4 |
+
"acc": 0.332,
|
5 |
+
"acc_stderr": 0.014899597242811487
|
6 |
+
}
|
7 |
+
},
|
8 |
+
"versions": {
|
9 |
+
"anli_r1": 0
|
10 |
+
}
|
11 |
+
}
|
4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_4_lm-eval_global_step80108_2023-02-26-09-23-54_4shots_backup.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"anli_r1": {
|
4 |
+
"acc": 0.332,
|
5 |
+
"acc_stderr": 0.014899597242811487
|
6 |
+
}
|
7 |
+
},
|
8 |
+
"versions": {
|
9 |
+
"anli_r1": 0
|
10 |
+
}
|
11 |
+
}
|
4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_5.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"anli_r1": {
|
4 |
+
"acc": 0.342,
|
5 |
+
"acc_stderr": 0.01500870618212173
|
6 |
+
}
|
7 |
+
},
|
8 |
+
"versions": {
|
9 |
+
"anli_r1": 0
|
10 |
+
}
|
11 |
+
}
|
4b284b17bc4seed2/evaluation/rankeval/4b284b17bc4seed2_5_lm-eval_global_step80108_2023-02-26-09-23-54_5shots_backup.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"anli_r1": {
|
4 |
+
"acc": 0.342,
|
5 |
+
"acc_stderr": 0.01500870618212173
|
6 |
+
}
|
7 |
+
},
|
8 |
+
"versions": {
|
9 |
+
"anli_r1": 0
|
10 |
+
}
|
11 |
+
}
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d19f4f6717cfa11f4089167db24b0b29e1bb2817134064bb874840168067714c
|
3 |
+
size 199058647
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:297b32a72bba9aa49c9537d880b9787a5a35eb47181f7deefe32272e4465cef4
|
3 |
+
size 199058647
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea2e6eedb9bc9bd97ec987fb7464df3040b9d475ee75ef384a04ca6722ab85c7
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_100_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18e485b0b011011d03e037d0907626277584d572ad497e420af521594b14d312
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd8c889e122a013bb966eb95ba0f0f8234ff81cf1364b05640560685be06e040
|
3 |
+
size 199058797
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_101_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01595dc0b077c8e72f618d8fd19c9268d9c3a26f2e6b17a1bd3e3ec765d46b09
|
3 |
+
size 199058797
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a4f5a39d584e098799a59a3372814f01ec9420c4ba42739b075b1c068033dd6
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_102_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79c622f72f8dd7644bc87c234d480488be11ba94b1ba0ef4aa7ad95ff3cbc68d
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f90660386a82d21ec7cf70295a7c3a408a41512f1f4b4024778660c298b13888
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_103_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02315f4f1c48bbbbd3d3cccd3ad5aea92f6fd004b6d5bd191fe78c442895bb48
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48e0ad698781c5c8c63cefc365a36a9adc5156770cf5181b65b0ec09228fd37d
|
3 |
+
size 199058797
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_104_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2774f3874f8dba8200a6104a5debf22a883f11e0120b4c60666423653e01c6e
|
3 |
+
size 199058797
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28fdd8002c30266f396cb23ed293aab7ec62d6c1470841c93e06dcc8a3db3c24
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_105_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff146a02f2012093b98d6a4d6b43107582c7d22febd5f3bcb987428070b3575f
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e524180750e5069192c18ed03de65c1423ef06d21ade032cbb2a284fb351d745
|
3 |
+
size 199058669
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_106_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c84e8469b70d872f85a6dee37e0f435241473def04ecd19c43154d2f3c4b4f0
|
3 |
+
size 199058669
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63eea16c8b46dcac2761d4c1815598f7c3f06ffaeb9414179ad3702a9119fb31
|
3 |
+
size 199058797
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_107_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c34b2fe2d547558d51c69b8a2beb3e108bed088e2c1b34c9c0917272aba4b797
|
3 |
+
size 199058797
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd152095fe9200af67618d01205c2b6750cbc6397927eefc3239e567229a894d
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_108_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33e26143bcc52fd589e27998e0770905dd02c6d0e0688f40e37ce8eb05af2d0b
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a5f77213756b50fa3818c545bb0408288e9716392636ef535cecf64bb4a268c
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_109_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2712850a994a27a466457b619ca2b2424e662a10ff15fe9a87f4384de75ec13b
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f757ac808f925c5090c7eea98f335b981638738c829df8cef1dee8c9bbdb720
|
3 |
+
size 199058658
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9dea9ecc09ae6dae0b1e7b1373c745e329724bd0b9ff2a07af3b4ff4451f46c1
|
3 |
+
size 199058658
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f213727a59089773f69e4a576a4fb43a8ed8fe2f53d06ba127e533436aed464
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_110_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc5d89562938d153423a014a48bdf96cca8a240121e59262de84635df48e1de3
|
3 |
+
size 199058733
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81e2b99c559d05f4acab047632c8aa03a50d4fab2a8546054d856bc676aa3623
|
3 |
+
size 199058797
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_111_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2dfb6e3f6beaee5bd8a62b45042d3f5b312d0544b917a136da14291a94ee3ae
|
3 |
+
size 199058797
|
4b284b17bc4seed2/global_step80108/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ec70fb5940e8e482dfad0d1383c93027356292f24b5b11d0299e6f832cc6603
|
3 |
+
size 199058605
|