diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json diff --git a/evaluation_l1/anli/dev_r1/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r1/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/anli/dev_r1/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r1/GPT-3_style/results.json diff --git a/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json diff --git a/evaluation_l1/anli/dev_r1/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r1/can_we_infer/results.json similarity index 100% rename from evaluation_l1/anli/dev_r1/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r1/can_we_infer/results.json diff --git a/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/anli/dev_r1/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r1/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/anli/dev_r1/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r1/justified_in_saying/results.json diff --git a/evaluation_l1/anli/dev_r2/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r2/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/anli/dev_r2/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r2/GPT-3_style/results.json diff --git a/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json diff --git a/evaluation_l1/anli/dev_r2/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r2/can_we_infer/results.json similarity index 100% rename from evaluation_l1/anli/dev_r2/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r2/can_we_infer/results.json diff --git a/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/anli/dev_r2/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r2/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/anli/dev_r2/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r2/justified_in_saying/results.json diff --git a/evaluation_l1/anli/dev_r3/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r3/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/anli/dev_r3/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r3/GPT-3_style/results.json diff --git a/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json diff --git a/evaluation_l1/anli/dev_r3/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r3/can_we_infer/results.json similarity index 100% rename from evaluation_l1/anli/dev_r3/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r3/can_we_infer/results.json diff --git a/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/anli/dev_r3/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r3/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/anli/dev_r3/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/anli/dev_r3/justified_in_saying/results.json diff --git a/evaluation_l1/merged.csv b/evaluation_bloomz-7b1-p3/evaluation_l1/merged.csv similarity index 100% rename from evaluation_l1/merged.csv rename to evaluation_bloomz-7b1-p3/evaluation_l1/merged.csv diff --git a/evaluation_l1/merged.json b/evaluation_bloomz-7b1-p3/evaluation_l1/merged.json similarity index 100% rename from evaluation_l1/merged.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/merged.json diff --git a/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/story_cloze/2016/Answer_Given_options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json diff --git a/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json diff --git a/evaluation_l1/story_cloze/2016/Generate_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/story_cloze/2016/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/story_cloze/2016/Generate_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/story_cloze/2016/Generate_Ending/results.json diff --git a/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/super_glue/cb/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/cb/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/super_glue/cb/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/cb/GPT-3_style/results.json diff --git a/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json diff --git a/evaluation_l1/super_glue/cb/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/cb/can_we_infer/results.json similarity index 100% rename from evaluation_l1/super_glue/cb/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/cb/can_we_infer/results.json diff --git a/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/super_glue/cb/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/cb/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/super_glue/cb/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/cb/justified_in_saying/results.json diff --git "a/evaluation_l1/super_glue/copa/C1_or_C2?_premise,_so_because\342\200\246/results.json" "b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/copa/C1_or_C2?_premise,_so_because\342\200\246/results.json" similarity index 100% rename from "evaluation_l1/super_glue/copa/C1_or_C2?_premise,_so_because\342\200\246/results.json" rename to "evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/copa/C1_or_C2?_premise,_so_because\342\200\246/results.json" diff --git a/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json similarity index 100% rename from evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json diff --git a/evaluation_l1/super_glue/copa/best_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/copa/best_option/results.json similarity index 100% rename from evaluation_l1/super_glue/copa/best_option/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/copa/best_option/results.json diff --git a/evaluation_l1/super_glue/copa/cause_effect/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/copa/cause_effect/results.json similarity index 100% rename from evaluation_l1/super_glue/copa/cause_effect/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/copa/cause_effect/results.json diff --git a/evaluation_l1/super_glue/copa/i_am_hesitating/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/copa/i_am_hesitating/results.json similarity index 100% rename from evaluation_l1/super_glue/copa/i_am_hesitating/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/copa/i_am_hesitating/results.json diff --git a/evaluation_l1/super_glue/copa/plausible_alternatives/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/copa/plausible_alternatives/results.json similarity index 100% rename from evaluation_l1/super_glue/copa/plausible_alternatives/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/copa/plausible_alternatives/results.json diff --git a/evaluation_l1/super_glue/rte/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/rte/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/super_glue/rte/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/rte/GPT-3_style/results.json diff --git a/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json diff --git a/evaluation_l1/super_glue/rte/does_it_follow_that/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/rte/does_it_follow_that/results.json similarity index 100% rename from evaluation_l1/super_glue/rte/does_it_follow_that/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/rte/does_it_follow_that/results.json diff --git a/evaluation_l1/super_glue/rte/guaranteed_true/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/rte/guaranteed_true/results.json similarity index 100% rename from evaluation_l1/super_glue/rte/guaranteed_true/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/rte/guaranteed_true/results.json diff --git a/evaluation_l1/super_glue/rte/should_assume/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/rte/should_assume/results.json similarity index 100% rename from evaluation_l1/super_glue/rte/should_assume/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/super_glue/rte/should_assume/results.json diff --git a/evaluation_l1/winogrande/winogrande_xl/Replace/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/winogrande/winogrande_xl/Replace/results.json similarity index 100% rename from evaluation_l1/winogrande/winogrande_xl/Replace/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/winogrande/winogrande_xl/Replace/results.json diff --git a/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json similarity index 100% rename from evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json diff --git a/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json diff --git a/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json similarity index 100% rename from evaluation_l1/winogrande/winogrande_xl/stand_for/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json diff --git a/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json diff --git a/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json similarity index 100% rename from evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json diff --git a/evaluation_l1/xcopa/id/best_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/id/best_option/results.json similarity index 100% rename from evaluation_l1/xcopa/id/best_option/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/id/best_option/results.json diff --git a/evaluation_l1/xcopa/id/cause_effect/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/id/cause_effect/results.json similarity index 100% rename from evaluation_l1/xcopa/id/cause_effect/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/id/cause_effect/results.json diff --git a/evaluation_l1/xcopa/id/i_am_hesitating/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/id/i_am_hesitating/results.json similarity index 100% rename from evaluation_l1/xcopa/id/i_am_hesitating/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/id/i_am_hesitating/results.json diff --git a/evaluation_l1/xcopa/id/plausible_alternatives/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/id/plausible_alternatives/results.json similarity index 100% rename from evaluation_l1/xcopa/id/plausible_alternatives/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/id/plausible_alternatives/results.json diff --git a/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json similarity index 100% rename from evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json diff --git a/evaluation_l1/xcopa/sw/best_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/sw/best_option/results.json similarity index 100% rename from evaluation_l1/xcopa/sw/best_option/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/sw/best_option/results.json diff --git a/evaluation_l1/xcopa/sw/cause_effect/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/sw/cause_effect/results.json similarity index 100% rename from evaluation_l1/xcopa/sw/cause_effect/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/sw/cause_effect/results.json diff --git a/evaluation_l1/xcopa/sw/i_am_hesitating/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/sw/i_am_hesitating/results.json similarity index 100% rename from evaluation_l1/xcopa/sw/i_am_hesitating/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/sw/i_am_hesitating/results.json diff --git a/evaluation_l1/xcopa/sw/plausible_alternatives/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/sw/plausible_alternatives/results.json similarity index 100% rename from evaluation_l1/xcopa/sw/plausible_alternatives/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/sw/plausible_alternatives/results.json diff --git a/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json similarity index 100% rename from evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json diff --git a/evaluation_l1/xcopa/ta/best_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/ta/best_option/results.json similarity index 100% rename from evaluation_l1/xcopa/ta/best_option/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/ta/best_option/results.json diff --git a/evaluation_l1/xcopa/ta/cause_effect/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/ta/cause_effect/results.json similarity index 100% rename from evaluation_l1/xcopa/ta/cause_effect/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/ta/cause_effect/results.json diff --git a/evaluation_l1/xcopa/ta/i_am_hesitating/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/ta/i_am_hesitating/results.json similarity index 100% rename from evaluation_l1/xcopa/ta/i_am_hesitating/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/ta/i_am_hesitating/results.json diff --git a/evaluation_l1/xcopa/ta/plausible_alternatives/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/ta/plausible_alternatives/results.json similarity index 100% rename from evaluation_l1/xcopa/ta/plausible_alternatives/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/ta/plausible_alternatives/results.json diff --git a/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json similarity index 100% rename from evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json diff --git a/evaluation_l1/xcopa/vi/best_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/vi/best_option/results.json similarity index 100% rename from evaluation_l1/xcopa/vi/best_option/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/vi/best_option/results.json diff --git a/evaluation_l1/xcopa/vi/cause_effect/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/vi/cause_effect/results.json similarity index 100% rename from evaluation_l1/xcopa/vi/cause_effect/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/vi/cause_effect/results.json diff --git a/evaluation_l1/xcopa/vi/i_am_hesitating/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/vi/i_am_hesitating/results.json similarity index 100% rename from evaluation_l1/xcopa/vi/i_am_hesitating/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/vi/i_am_hesitating/results.json diff --git a/evaluation_l1/xcopa/vi/plausible_alternatives/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/vi/plausible_alternatives/results.json similarity index 100% rename from evaluation_l1/xcopa/vi/plausible_alternatives/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/vi/plausible_alternatives/results.json diff --git "a/evaluation_l1/xcopa/zh/C1_or_C2?_premise,_so_because\342\200\246/results.json" "b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/zh/C1_or_C2?_premise,_so_because\342\200\246/results.json" similarity index 100% rename from "evaluation_l1/xcopa/zh/C1_or_C2?_premise,_so_because\342\200\246/results.json" rename to "evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/zh/C1_or_C2?_premise,_so_because\342\200\246/results.json" diff --git a/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json similarity index 100% rename from evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json diff --git a/evaluation_l1/xcopa/zh/best_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/zh/best_option/results.json similarity index 100% rename from evaluation_l1/xcopa/zh/best_option/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/zh/best_option/results.json diff --git a/evaluation_l1/xcopa/zh/cause_effect/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/zh/cause_effect/results.json similarity index 100% rename from evaluation_l1/xcopa/zh/cause_effect/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/zh/cause_effect/results.json diff --git a/evaluation_l1/xcopa/zh/i_am_hesitating/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/zh/i_am_hesitating/results.json similarity index 100% rename from evaluation_l1/xcopa/zh/i_am_hesitating/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/zh/i_am_hesitating/results.json diff --git a/evaluation_l1/xcopa/zh/plausible_alternatives/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/zh/plausible_alternatives/results.json similarity index 100% rename from evaluation_l1/xcopa/zh/plausible_alternatives/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xcopa/zh/plausible_alternatives/results.json diff --git a/evaluation_l1/xnli/ar/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ar/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/ar/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ar/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/ar/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/ar/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ar/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/ar/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ar/can_we_infer/results.json diff --git a/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/ar/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ar/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/ar/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ar/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/en/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/en/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/en/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/en/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/en/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/en/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/en/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/en/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/en/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/en/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/en/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/en/can_we_infer/results.json diff --git a/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/en/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/en/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/en/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/en/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/es/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/es/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/es/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/es/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/es/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/es/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/es/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/es/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/es/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/es/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/es/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/es/can_we_infer/results.json diff --git a/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/es/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/es/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/es/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/es/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/fr/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/fr/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/fr/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/fr/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/fr/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/fr/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/fr/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/fr/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/fr/can_we_infer/results.json diff --git a/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/fr/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/fr/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/fr/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/fr/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/hi/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/hi/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/hi/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/hi/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/hi/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/hi/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/hi/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/hi/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/hi/can_we_infer/results.json diff --git a/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/hi/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/hi/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/hi/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/hi/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/sw/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/sw/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/sw/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/sw/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/sw/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/sw/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/sw/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/sw/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/sw/can_we_infer/results.json diff --git a/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/sw/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/sw/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/sw/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/sw/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/ur/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ur/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/ur/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ur/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/ur/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/ur/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ur/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/ur/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ur/can_we_infer/results.json diff --git a/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/ur/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ur/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/ur/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/ur/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/vi/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/vi/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/vi/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/vi/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/vi/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/vi/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/vi/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/vi/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/vi/can_we_infer/results.json diff --git a/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/vi/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/vi/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/vi/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/vi/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/zh/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/zh/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/zh/GPT-3_style/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/zh/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/zh/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/zh/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/zh/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/zh/can_we_infer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/zh/can_we_infer/results.json diff --git a/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/zh/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l1/xnli/zh/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/zh/justified_in_saying/results.json rename to evaluation_bloomz-7b1-p3/evaluation_l1/xnli/zh/justified_in_saying/results.json diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Answer_Given_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Answer_Given_options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2fa1366baa3f3164f7339758abe2f9aed3a0b100 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Answer_Given_options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "my", + "template_name": "Answer Given options", + "evaluation": { + "accuracy": 0.5056254136333554 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ed1a670ba83badcd3993d8ed19a560a9bb79dc12 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "my", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.5069490403706155 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Generate_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d599c8d4f1696a1360aea911b16747e600fbb9a0 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "my", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.4784910655195235 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..bfefdb1a61e19f06d803ac4f3173be71f1ba814c --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "my", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.5102581072137657 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Story_Continuation_and_Options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..08300d74c53a528cb926b0f8565a58559d65f320 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/my/Story_Continuation_and_Options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "my", + "template_name": "Story Continuation and Options", + "evaluation": { + "accuracy": 0.5062872270019855 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b2f5de0276da63d72fd049a829ed85ae2efa6889 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ru", + "template_name": "Answer Given options", + "evaluation": { + "accuracy": 0.6406353408338848 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..aed3c1e40352d4994d7453b4dd968ebf10845edb --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ru", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.6644606221045665 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ced50bd5cfd4a894c479450459f0b7c7a1aae38f --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ru", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.514890800794176 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d7d783c6b5e0ad77de67d495fbbd7ed5401f56c3 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ru", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.6393117140966248 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f011df8e923160b1d0b4f2b30824d7d15ad2fc52 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ru", + "template_name": "Story Continuation and Options", + "evaluation": { + "accuracy": 0.6545334215751158 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xwinograd/jp/True_or_False/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xwinograd/jp/True_or_False/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2dbc88cc8ef1dc56d022857658461e92774126c4 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/Muennighoff_xwinograd/jp/True_or_False/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "jp", + "template_name": "True or False", + "evaluation": { + "accuracy": 0.502606882168926 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='jp', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..743e97428106c87b3450709518f89196ddf98f47 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "et", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.47 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/best_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..50ee24d110ddd12a3ae4a2a93a8648d7eba5c2e9 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "et", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.52 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/cause_effect/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..88e21e35c477d52a19e8b476fe4a37d723fca060 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "et", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.49 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/i_am_hesitating/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..66ad24def026b4b5d837bcc53d0b3a0a3d8fa289 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "et", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.57 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/plausible_alternatives/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d7a95b8fba81c6b730aa4734765859fbca5ef18e --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/et/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "et", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.55 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..90ff58360e77ecd528951596b88a276d4256bd87 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ht", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.51 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/best_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..60d6cf6cf44949d0030d642395a4b2849491cb90 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ht", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.47 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/cause_effect/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..97837e87e8021496728fd751db699446c59c5f3c --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ht", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.55 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/i_am_hesitating/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2567e741b7507173e8c83a77324aad31bb04746a --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ht", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.51 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/plausible_alternatives/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a729197a1e501b2b9ffacf8f73611086cde3548c --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/ht/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ht", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.52 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d2850ee91e4f761bf5c75b87f900e9390e134166 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "it", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.57 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/best_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..07549f5a813dff52d196401e4a351cc96449ac28 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "it", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.52 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/cause_effect/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..396c0335427a1ae8d0cde59115bd8d70ff7349f9 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "it", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.54 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/i_am_hesitating/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..46df8997029acb9f470ac96a02070004ba2a5286 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "it", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.57 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/plausible_alternatives/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c0e5ea445da5a507dc6d9500868174b0dec35636 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/it/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "it", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.6 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a51e6d22714a31a5a1908d96285a6ca610c4bd5d --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "qu", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.47 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/best_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d1ef876898c99a5d7e1b759a0cbf4e95aee579ed --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "qu", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.52 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/cause_effect/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..28d4308b5063689193a6a3f9c5fc3a0408b32424 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "qu", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.5 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/i_am_hesitating/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6c6ea456bd7d217ddeb9eff26d27f189e4fb8f3d --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "qu", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.48 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/plausible_alternatives/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8cdca1ceb992db7de187ff7b3b074910cdd2aa2f --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/qu/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "qu", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.54 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7451e8603b3a60fdf5d8108622cf549c493f677f --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "tr", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.55 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/best_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..fe7ad7f0511b66e8ecd37a91d3a9d91a5c33a54e --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "tr", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.48 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/cause_effect/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..12842318c0ffe852c5ee563b64e06b3ad44a8671 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "tr", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.53 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/i_am_hesitating/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..69e4de4278c8db371a286dcb8797e23cecdd18a7 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "tr", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.54 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/plausible_alternatives/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..dce2013b5f9cf0bdc701885e71423d775fbc15a6 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xcopa/tr/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "tr", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.52 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3dcfb826a65b7c2f157d69b422611092b936e879 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "bg", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.43775100401606426 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a662fc6a2c50116b2d1e2dcb7423e576f3f0cc24 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "bg", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.36666666666666664 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..27aadc38a6ab9100f1d71e4bbadc98d6ad4a7047 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "bg", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.4397590361445783 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a8966b90d5fb1fb8a781b652138e303bad545e18 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "bg", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.41646586345381525 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..756dbc8dd032b05a68ac5583c50f042db39a6b81 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/bg/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "bg", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.4108433734939759 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..76cbb91a3bf0b9ca6eae121f4c9a0ff713c72ab5 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "de", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.470281124497992 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1accdf10bd9bef5f6331e8324f87c8b4c81e124e --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "de", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.3586345381526104 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ef2e3c981169e1f2053d385dc4ddaad69eeb8d89 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "de", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.44016064257028115 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..50b78cb97a5cdcb5f92230ae2cf88eefa983947a --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "de", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.3538152610441767 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..55a8940192489542231c0c20f23b05d72cbb8ece --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/de/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "de", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.41847389558232934 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ac4efe3ed769910cf27f922876f00a0e9c1d6cb5 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "el", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.41887550200803214 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..748a4284d39fb8cb62112ca3ff48a647b7b37e72 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "el", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.3598393574297189 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2e3d5979159003b6c3d8226188ca49d309f40172 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "el", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.4108433734939759 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7293712ef65ebb9803c82b7c3646dee70b678674 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "el", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.40682730923694777 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f1887c483c1f623abaafe16bb07fbad133cf96f8 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/el/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "el", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.3823293172690763 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e33ac69df066aa1e090131f923f6039723042f7e --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ru", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.46546184738955826 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6f3d789e99476f8686f359a51ecd1008474cd8de --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ru", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.3819277108433735 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b5d4131df75bb7dcb68dac9c7c67908952a83c9e --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ru", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.4614457831325301 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8d1aa37e43703e9053bde8f680afda62a41b1f86 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ru", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.42208835341365464 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7266f29d83a2e4326b8a3dd6c0720c3e962e4201 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/ru/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ru", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.4389558232931727 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4ec3de913e144b17fb1fc084a4ed740a69e091f9 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "th", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.41646586345381525 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..30f0cf4f14c4d85ff945c5e915f5d5b4f395ee2b --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "th", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.3224899598393574 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a5f80414456b28a5235a9fa5e1fb6d0f45db60fe --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "th", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.43172690763052207 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..983ee3c8ea127009951e5a1b38ac1e4489e4ea2b --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "th", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.42730923694779116 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..dde1f5287ebbb7147456c6267bf7925bd2c10475 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/th/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "th", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.40401606425702813 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/GPT-3_style/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..72afbabffd057a9fed4e89ee9d6967f072b49d83 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.40240963855421685 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6ccab90137c2c566b436fab44866f572304fa8fe --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.351004016064257 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/can_we_infer/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3e0b43397cdd6c73fd2d439d9ed02dd40b6a8311 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.40441767068273093 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..92190e9f7a2c3f7884d46db7161c457fdc5d4f86 --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.3678714859437751 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/justified_in_saying/results.json b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2567b542af12adc1a575bf13bc3a6db825170d4d --- /dev/null +++ b/evaluation_bloomz-7b1-p3/evaluation_l2/xnli/tr/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.38313253012048193 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1-p3/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/amazon_reviews_multi/en/prompt_body_title_to_star/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/en/prompt_body_title_to_star/results.json similarity index 100% rename from evaluation_val/amazon_reviews_multi/en/prompt_body_title_to_star/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/en/prompt_body_title_to_star/results.json diff --git a/evaluation_val/amazon_reviews_multi/en/prompt_review_to_star/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/en/prompt_review_to_star/results.json similarity index 100% rename from evaluation_val/amazon_reviews_multi/en/prompt_review_to_star/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/en/prompt_review_to_star/results.json diff --git a/evaluation_val/amazon_reviews_multi/en/prompt_title_to_star/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/en/prompt_title_to_star/results.json similarity index 100% rename from evaluation_val/amazon_reviews_multi/en/prompt_title_to_star/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/en/prompt_title_to_star/results.json diff --git a/evaluation_val/amazon_reviews_multi/es/prompt_body_title_to_star/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/es/prompt_body_title_to_star/results.json similarity index 100% rename from evaluation_val/amazon_reviews_multi/es/prompt_body_title_to_star/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/es/prompt_body_title_to_star/results.json diff --git a/evaluation_val/amazon_reviews_multi/es/prompt_review_to_star/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/es/prompt_review_to_star/results.json similarity index 100% rename from evaluation_val/amazon_reviews_multi/es/prompt_review_to_star/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/es/prompt_review_to_star/results.json diff --git a/evaluation_val/amazon_reviews_multi/es/prompt_title_to_star/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/es/prompt_title_to_star/results.json similarity index 100% rename from evaluation_val/amazon_reviews_multi/es/prompt_title_to_star/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/es/prompt_title_to_star/results.json diff --git a/evaluation_val/amazon_reviews_multi/fr/prompt_body_title_to_star/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/fr/prompt_body_title_to_star/results.json similarity index 100% rename from evaluation_val/amazon_reviews_multi/fr/prompt_body_title_to_star/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/fr/prompt_body_title_to_star/results.json diff --git a/evaluation_val/amazon_reviews_multi/fr/prompt_review_to_star/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/fr/prompt_review_to_star/results.json similarity index 100% rename from evaluation_val/amazon_reviews_multi/fr/prompt_review_to_star/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/fr/prompt_review_to_star/results.json diff --git a/evaluation_val/amazon_reviews_multi/fr/prompt_title_to_star/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/fr/prompt_title_to_star/results.json similarity index 100% rename from evaluation_val/amazon_reviews_multi/fr/prompt_title_to_star/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/fr/prompt_title_to_star/results.json diff --git a/evaluation_val/amazon_reviews_multi/zh/prompt_body_title_to_star/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/zh/prompt_body_title_to_star/results.json similarity index 100% rename from evaluation_val/amazon_reviews_multi/zh/prompt_body_title_to_star/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/zh/prompt_body_title_to_star/results.json diff --git a/evaluation_val/amazon_reviews_multi/zh/prompt_review_to_star/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/zh/prompt_review_to_star/results.json similarity index 100% rename from evaluation_val/amazon_reviews_multi/zh/prompt_review_to_star/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/zh/prompt_review_to_star/results.json diff --git a/evaluation_val/amazon_reviews_multi/zh/prompt_title_to_star/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/zh/prompt_title_to_star/results.json similarity index 100% rename from evaluation_val/amazon_reviews_multi/zh/prompt_title_to_star/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/amazon_reviews_multi/zh/prompt_title_to_star/results.json diff --git a/evaluation_val/aqua_rat/raw/Answer_questions_from_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/aqua_rat/raw/Answer_questions_from_options/results.json similarity index 100% rename from evaluation_val/aqua_rat/raw/Answer_questions_from_options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/aqua_rat/raw/Answer_questions_from_options/results.json diff --git a/evaluation_val/aqua_rat/raw/answer_quiz/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/aqua_rat/raw/answer_quiz/results.json similarity index 100% rename from evaluation_val/aqua_rat/raw/answer_quiz/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/aqua_rat/raw/answer_quiz/results.json diff --git a/evaluation_val/aqua_rat/raw/select_the_best_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/aqua_rat/raw/select_the_best_option/results.json similarity index 100% rename from evaluation_val/aqua_rat/raw/select_the_best_option/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/aqua_rat/raw/select_the_best_option/results.json diff --git a/evaluation_val/art/choose_hypothesis/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/art/choose_hypothesis/results.json similarity index 100% rename from evaluation_val/art/choose_hypothesis/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/art/choose_hypothesis/results.json diff --git a/evaluation_val/art/choose_hypothesis_believable/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/art/choose_hypothesis_believable/results.json similarity index 100% rename from evaluation_val/art/choose_hypothesis_believable/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/art/choose_hypothesis_believable/results.json diff --git a/evaluation_val/art/choose_hypothesis_desc/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/art/choose_hypothesis_desc/results.json similarity index 100% rename from evaluation_val/art/choose_hypothesis_desc/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/art/choose_hypothesis_desc/results.json diff --git a/evaluation_val/art/choose_hypothesis_likely/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/art/choose_hypothesis_likely/results.json similarity index 100% rename from evaluation_val/art/choose_hypothesis_likely/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/art/choose_hypothesis_likely/results.json diff --git a/evaluation_val/art/choose_hypothesis_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/art/choose_hypothesis_options/results.json similarity index 100% rename from evaluation_val/art/choose_hypothesis_options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/art/choose_hypothesis_options/results.json diff --git a/evaluation_val/banking77/direct_to_which_department/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/banking77/direct_to_which_department/results.json similarity index 100% rename from evaluation_val/banking77/direct_to_which_department/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/banking77/direct_to_which_department/results.json diff --git a/evaluation_val/banking77/help_page_topic/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/banking77/help_page_topic/results.json similarity index 100% rename from evaluation_val/banking77/help_page_topic/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/banking77/help_page_topic/results.json diff --git a/evaluation_val/banking77/rephrase_as_banking_term/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/banking77/rephrase_as_banking_term/results.json similarity index 100% rename from evaluation_val/banking77/rephrase_as_banking_term/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/banking77/rephrase_as_banking_term/results.json diff --git a/evaluation_val/blbooksgenre/title_genre_classifiction/classify/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/blbooksgenre/title_genre_classifiction/classify/results.json similarity index 100% rename from evaluation_val/blbooksgenre/title_genre_classifiction/classify/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/blbooksgenre/title_genre_classifiction/classify/results.json diff --git a/evaluation_val/blbooksgenre/title_genre_classifiction/multi-choice/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/blbooksgenre/title_genre_classifiction/multi-choice/results.json similarity index 100% rename from evaluation_val/blbooksgenre/title_genre_classifiction/multi-choice/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/blbooksgenre/title_genre_classifiction/multi-choice/results.json diff --git a/evaluation_val/blbooksgenre/title_genre_classifiction/premise_context_first/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/blbooksgenre/title_genre_classifiction/premise_context_first/results.json similarity index 100% rename from evaluation_val/blbooksgenre/title_genre_classifiction/premise_context_first/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/blbooksgenre/title_genre_classifiction/premise_context_first/results.json diff --git a/evaluation_val/blimp/adjunct_island/grammatical_between_1_2/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/blimp/adjunct_island/grammatical_between_1_2/results.json similarity index 100% rename from evaluation_val/blimp/adjunct_island/grammatical_between_1_2/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/blimp/adjunct_island/grammatical_between_1_2/results.json diff --git a/evaluation_val/blimp/adjunct_island/grammatical_between_A_B/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/blimp/adjunct_island/grammatical_between_A_B/results.json similarity index 100% rename from evaluation_val/blimp/adjunct_island/grammatical_between_A_B/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/blimp/adjunct_island/grammatical_between_A_B/results.json diff --git a/evaluation_val/blimp/adjunct_island/grammatical_which_one_1_2/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/blimp/adjunct_island/grammatical_which_one_1_2/results.json similarity index 100% rename from evaluation_val/blimp/adjunct_island/grammatical_which_one_1_2/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/blimp/adjunct_island/grammatical_which_one_1_2/results.json diff --git a/evaluation_val/blimp/adjunct_island/single_sentence_bad_yes_no/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/blimp/adjunct_island/single_sentence_bad_yes_no/results.json similarity index 100% rename from evaluation_val/blimp/adjunct_island/single_sentence_bad_yes_no/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/blimp/adjunct_island/single_sentence_bad_yes_no/results.json diff --git a/evaluation_val/blimp/adjunct_island/single_sentence_good_yes_no/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/blimp/adjunct_island/single_sentence_good_yes_no/results.json similarity index 100% rename from evaluation_val/blimp/adjunct_island/single_sentence_good_yes_no/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/blimp/adjunct_island/single_sentence_good_yes_no/results.json diff --git a/evaluation_val/climate_fever/claim_and_all_supporting_evidences/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/climate_fever/claim_and_all_supporting_evidences/results.json similarity index 100% rename from evaluation_val/climate_fever/claim_and_all_supporting_evidences/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/climate_fever/claim_and_all_supporting_evidences/results.json diff --git a/evaluation_val/climate_fever/fifth_evidence_and_claim_itemization/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/climate_fever/fifth_evidence_and_claim_itemization/results.json similarity index 100% rename from evaluation_val/climate_fever/fifth_evidence_and_claim_itemization/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/climate_fever/fifth_evidence_and_claim_itemization/results.json diff --git a/evaluation_val/climate_fever/first_evidence_and_claim_itemization/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/climate_fever/first_evidence_and_claim_itemization/results.json similarity index 100% rename from evaluation_val/climate_fever/first_evidence_and_claim_itemization/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/climate_fever/first_evidence_and_claim_itemization/results.json diff --git a/evaluation_val/climate_fever/second_evidence_and_claim_itemization/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/climate_fever/second_evidence_and_claim_itemization/results.json similarity index 100% rename from evaluation_val/climate_fever/second_evidence_and_claim_itemization/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/climate_fever/second_evidence_and_claim_itemization/results.json diff --git a/evaluation_val/climate_fever/third_evidence_claim_pair/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/climate_fever/third_evidence_claim_pair/results.json similarity index 100% rename from evaluation_val/climate_fever/third_evidence_claim_pair/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/climate_fever/third_evidence_claim_pair/results.json diff --git a/evaluation_val/codah/codah/affirmative_instruction_after_sentence_and_choices/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/codah/codah/affirmative_instruction_after_sentence_and_choices/results.json similarity index 100% rename from evaluation_val/codah/codah/affirmative_instruction_after_sentence_and_choices/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/codah/codah/affirmative_instruction_after_sentence_and_choices/results.json diff --git a/evaluation_val/codah/codah/affirmative_instruction_before_sentence_and_choices/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/codah/codah/affirmative_instruction_before_sentence_and_choices/results.json similarity index 100% rename from evaluation_val/codah/codah/affirmative_instruction_before_sentence_and_choices/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/codah/codah/affirmative_instruction_before_sentence_and_choices/results.json diff --git a/evaluation_val/codah/codah/interrogative_instruction_after_sentence_and_choices/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/codah/codah/interrogative_instruction_after_sentence_and_choices/results.json similarity index 100% rename from evaluation_val/codah/codah/interrogative_instruction_after_sentence_and_choices/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/codah/codah/interrogative_instruction_after_sentence_and_choices/results.json diff --git a/evaluation_val/commonsense_qa/answer_given_question_without_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/commonsense_qa/answer_given_question_without_options/results.json similarity index 100% rename from evaluation_val/commonsense_qa/answer_given_question_without_options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/commonsense_qa/answer_given_question_without_options/results.json diff --git a/evaluation_val/commonsense_qa/most_suitable_answer/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/commonsense_qa/most_suitable_answer/results.json similarity index 100% rename from evaluation_val/commonsense_qa/most_suitable_answer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/commonsense_qa/most_suitable_answer/results.json diff --git a/evaluation_val/commonsense_qa/question_answering/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/commonsense_qa/question_answering/results.json similarity index 100% rename from evaluation_val/commonsense_qa/question_answering/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/commonsense_qa/question_answering/results.json diff --git a/evaluation_val/conv_ai_3/ambiguous/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/conv_ai_3/ambiguous/results.json similarity index 100% rename from evaluation_val/conv_ai_3/ambiguous/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/conv_ai_3/ambiguous/results.json diff --git a/evaluation_val/conv_ai_3/clarification_needed/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/conv_ai_3/clarification_needed/results.json similarity index 100% rename from evaluation_val/conv_ai_3/clarification_needed/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/conv_ai_3/clarification_needed/results.json diff --git a/evaluation_val/conv_ai_3/directly_answer/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/conv_ai_3/directly_answer/results.json similarity index 100% rename from evaluation_val/conv_ai_3/directly_answer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/conv_ai_3/directly_answer/results.json diff --git a/evaluation_val/conv_ai_3/score_give_number/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/conv_ai_3/score_give_number/results.json similarity index 100% rename from evaluation_val/conv_ai_3/score_give_number/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/conv_ai_3/score_give_number/results.json diff --git a/evaluation_val/conv_ai_3/score_how_much/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/conv_ai_3/score_how_much/results.json similarity index 100% rename from evaluation_val/conv_ai_3/score_how_much/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/conv_ai_3/score_how_much/results.json diff --git a/evaluation_val/craigslist_bargains/best_deal/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/craigslist_bargains/best_deal/results.json similarity index 100% rename from evaluation_val/craigslist_bargains/best_deal/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/craigslist_bargains/best_deal/results.json diff --git a/evaluation_val/craigslist_bargains/good_deal_for_seller/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/craigslist_bargains/good_deal_for_seller/results.json similarity index 100% rename from evaluation_val/craigslist_bargains/good_deal_for_seller/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/craigslist_bargains/good_deal_for_seller/results.json diff --git a/evaluation_val/craigslist_bargains/good_deal_for_seller_no_list_price/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/craigslist_bargains/good_deal_for_seller_no_list_price/results.json similarity index 100% rename from evaluation_val/craigslist_bargains/good_deal_for_seller_no_list_price/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/craigslist_bargains/good_deal_for_seller_no_list_price/results.json diff --git a/evaluation_val/craigslist_bargains/good_deal_for_seller_no_list_price_implicit/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/craigslist_bargains/good_deal_for_seller_no_list_price_implicit/results.json similarity index 100% rename from evaluation_val/craigslist_bargains/good_deal_for_seller_no_list_price_implicit/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/craigslist_bargains/good_deal_for_seller_no_list_price_implicit/results.json diff --git a/evaluation_val/emotion/answer_question_with_emotion_label/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/emotion/answer_question_with_emotion_label/results.json similarity index 100% rename from evaluation_val/emotion/answer_question_with_emotion_label/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/emotion/answer_question_with_emotion_label/results.json diff --git a/evaluation_val/emotion/answer_with_class_label/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/emotion/answer_with_class_label/results.json similarity index 100% rename from evaluation_val/emotion/answer_with_class_label/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/emotion/answer_with_class_label/results.json diff --git a/evaluation_val/emotion/choose_the_best_emotion_label/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/emotion/choose_the_best_emotion_label/results.json similarity index 100% rename from evaluation_val/emotion/choose_the_best_emotion_label/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/emotion/choose_the_best_emotion_label/results.json diff --git a/evaluation_val/emotion/reply_with_emoation_label/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/emotion/reply_with_emoation_label/results.json similarity index 100% rename from evaluation_val/emotion/reply_with_emoation_label/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/emotion/reply_with_emoation_label/results.json diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl similarity index 100% rename from evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl diff --git a/evaluation_val/financial_phrasebank/sentences_allagree/bullish_neutral_bearish/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/financial_phrasebank/sentences_allagree/bullish_neutral_bearish/results.json similarity index 100% rename from evaluation_val/financial_phrasebank/sentences_allagree/bullish_neutral_bearish/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/financial_phrasebank/sentences_allagree/bullish_neutral_bearish/results.json diff --git a/evaluation_val/financial_phrasebank/sentences_allagree/complementary_industries/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/financial_phrasebank/sentences_allagree/complementary_industries/results.json similarity index 100% rename from evaluation_val/financial_phrasebank/sentences_allagree/complementary_industries/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/financial_phrasebank/sentences_allagree/complementary_industries/results.json diff --git a/evaluation_val/financial_phrasebank/sentences_allagree/sentiment/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/financial_phrasebank/sentences_allagree/sentiment/results.json similarity index 100% rename from evaluation_val/financial_phrasebank/sentences_allagree/sentiment/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/financial_phrasebank/sentences_allagree/sentiment/results.json diff --git a/evaluation_val/financial_phrasebank/sentences_allagree/share_price_option/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/financial_phrasebank/sentences_allagree/share_price_option/results.json similarity index 100% rename from evaluation_val/financial_phrasebank/sentences_allagree/share_price_option/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/financial_phrasebank/sentences_allagree/share_price_option/results.json diff --git a/evaluation_val/financial_phrasebank/sentences_allagree/word_comes_to_mind/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/financial_phrasebank/sentences_allagree/word_comes_to_mind/results.json similarity index 100% rename from evaluation_val/financial_phrasebank/sentences_allagree/word_comes_to_mind/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/financial_phrasebank/sentences_allagree/word_comes_to_mind/results.json diff --git a/evaluation_val/glue/cola/Following_sentence_acceptable/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/glue/cola/Following_sentence_acceptable/results.json similarity index 100% rename from evaluation_val/glue/cola/Following_sentence_acceptable/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/glue/cola/Following_sentence_acceptable/results.json diff --git a/evaluation_val/glue/cola/Make_sense_yes_no/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/glue/cola/Make_sense_yes_no/results.json similarity index 100% rename from evaluation_val/glue/cola/Make_sense_yes_no/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/glue/cola/Make_sense_yes_no/results.json diff --git a/evaluation_val/glue/cola/Previous_sentence_acceptable/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/glue/cola/Previous_sentence_acceptable/results.json similarity index 100% rename from evaluation_val/glue/cola/Previous_sentence_acceptable/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/glue/cola/Previous_sentence_acceptable/results.json diff --git a/evaluation_val/glue/cola/editing/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/glue/cola/editing/results.json similarity index 100% rename from evaluation_val/glue/cola/editing/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/glue/cola/editing/results.json diff --git a/evaluation_val/glue/cola/is_this_correct/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/glue/cola/is_this_correct/results.json similarity index 100% rename from evaluation_val/glue/cola/is_this_correct/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/glue/cola/is_this_correct/results.json diff --git a/evaluation_val/glue/sst2/following_positive_negative/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/glue/sst2/following_positive_negative/results.json similarity index 100% rename from evaluation_val/glue/sst2/following_positive_negative/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/glue/sst2/following_positive_negative/results.json diff --git a/evaluation_val/glue/sst2/happy_or_mad/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/glue/sst2/happy_or_mad/results.json similarity index 100% rename from evaluation_val/glue/sst2/happy_or_mad/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/glue/sst2/happy_or_mad/results.json diff --git a/evaluation_val/glue/sst2/positive_negative_after/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/glue/sst2/positive_negative_after/results.json similarity index 100% rename from evaluation_val/glue/sst2/positive_negative_after/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/glue/sst2/positive_negative_after/results.json diff --git a/evaluation_val/glue/sst2/review/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/glue/sst2/review/results.json similarity index 100% rename from evaluation_val/glue/sst2/review/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/glue/sst2/review/results.json diff --git a/evaluation_val/glue/sst2/said/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/glue/sst2/said/results.json similarity index 100% rename from evaluation_val/glue/sst2/said/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/glue/sst2/said/results.json diff --git a/evaluation_val/head_qa/en/multiple_choice_a_and_q_en/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/head_qa/en/multiple_choice_a_and_q_en/results.json similarity index 100% rename from evaluation_val/head_qa/en/multiple_choice_a_and_q_en/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/head_qa/en/multiple_choice_a_and_q_en/results.json diff --git a/evaluation_val/head_qa/en/multiple_choice_a_and_q_with_context_en/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/head_qa/en/multiple_choice_a_and_q_with_context_en/results.json similarity index 100% rename from evaluation_val/head_qa/en/multiple_choice_a_and_q_with_context_en/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/head_qa/en/multiple_choice_a_and_q_with_context_en/results.json diff --git a/evaluation_val/head_qa/en/multiple_choice_q_and_a_en/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/head_qa/en/multiple_choice_q_and_a_en/results.json similarity index 100% rename from evaluation_val/head_qa/en/multiple_choice_q_and_a_en/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/head_qa/en/multiple_choice_q_and_a_en/results.json diff --git a/evaluation_val/head_qa/en/multiple_choice_q_and_a_index_en/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/head_qa/en/multiple_choice_q_and_a_index_en/results.json similarity index 100% rename from evaluation_val/head_qa/en/multiple_choice_q_and_a_index_en/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/head_qa/en/multiple_choice_q_and_a_index_en/results.json diff --git a/evaluation_val/head_qa/en/multiple_choice_q_and_a_index_with_context_en/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/head_qa/en/multiple_choice_q_and_a_index_with_context_en/results.json similarity index 100% rename from evaluation_val/head_qa/en/multiple_choice_q_and_a_index_with_context_en/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/head_qa/en/multiple_choice_q_and_a_index_with_context_en/results.json diff --git a/evaluation_val/head_qa/es/multiple_choice_a_and_q_en/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/head_qa/es/multiple_choice_a_and_q_en/results.json similarity index 100% rename from evaluation_val/head_qa/es/multiple_choice_a_and_q_en/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/head_qa/es/multiple_choice_a_and_q_en/results.json diff --git a/evaluation_val/head_qa/es/multiple_choice_a_and_q_with_context_en/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/head_qa/es/multiple_choice_a_and_q_with_context_en/results.json similarity index 100% rename from evaluation_val/head_qa/es/multiple_choice_a_and_q_with_context_en/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/head_qa/es/multiple_choice_a_and_q_with_context_en/results.json diff --git a/evaluation_val/head_qa/es/multiple_choice_q_and_a_en/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/head_qa/es/multiple_choice_q_and_a_en/results.json similarity index 100% rename from evaluation_val/head_qa/es/multiple_choice_q_and_a_en/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/head_qa/es/multiple_choice_q_and_a_en/results.json diff --git a/evaluation_val/head_qa/es/multiple_choice_q_and_a_index_en/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/head_qa/es/multiple_choice_q_and_a_index_en/results.json similarity index 100% rename from evaluation_val/head_qa/es/multiple_choice_q_and_a_index_en/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/head_qa/es/multiple_choice_q_and_a_index_en/results.json diff --git a/evaluation_val/head_qa/es/multiple_choice_q_and_a_index_with_context_en/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/head_qa/es/multiple_choice_q_and_a_index_with_context_en/results.json similarity index 100% rename from evaluation_val/head_qa/es/multiple_choice_q_and_a_index_with_context_en/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/head_qa/es/multiple_choice_q_and_a_index_with_context_en/results.json diff --git a/evaluation_val/health_fact/claim_explanation_classification/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/health_fact/claim_explanation_classification/results.json similarity index 100% rename from evaluation_val/health_fact/claim_explanation_classification/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/health_fact/claim_explanation_classification/results.json diff --git a/evaluation_val/health_fact/claim_veracity_classification_after_reading_I_believe/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/health_fact/claim_veracity_classification_after_reading_I_believe/results.json similarity index 100% rename from evaluation_val/health_fact/claim_veracity_classification_after_reading_I_believe/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/health_fact/claim_veracity_classification_after_reading_I_believe/results.json diff --git a/evaluation_val/health_fact/claim_veracity_classification_tell_me/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/health_fact/claim_veracity_classification_tell_me/results.json similarity index 100% rename from evaluation_val/health_fact/claim_veracity_classification_tell_me/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/health_fact/claim_veracity_classification_tell_me/results.json diff --git a/evaluation_val/hlgd/is_same_event_editor_asks/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/hlgd/is_same_event_editor_asks/results.json similarity index 100% rename from evaluation_val/hlgd/is_same_event_editor_asks/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/hlgd/is_same_event_editor_asks/results.json diff --git a/evaluation_val/hlgd/is_same_event_interrogative_talk/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/hlgd/is_same_event_interrogative_talk/results.json similarity index 100% rename from evaluation_val/hlgd/is_same_event_interrogative_talk/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/hlgd/is_same_event_interrogative_talk/results.json diff --git a/evaluation_val/hlgd/is_same_event_refer/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/hlgd/is_same_event_refer/results.json similarity index 100% rename from evaluation_val/hlgd/is_same_event_refer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/hlgd/is_same_event_refer/results.json diff --git a/evaluation_val/hlgd/is_same_event_with_time_interrogative_related/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/hlgd/is_same_event_with_time_interrogative_related/results.json similarity index 100% rename from evaluation_val/hlgd/is_same_event_with_time_interrogative_related/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/hlgd/is_same_event_with_time_interrogative_related/results.json diff --git a/evaluation_val/hlgd/is_same_event_with_time_interrogative_talk/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/hlgd/is_same_event_with_time_interrogative_talk/results.json similarity index 100% rename from evaluation_val/hlgd/is_same_event_with_time_interrogative_talk/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/hlgd/is_same_event_with_time_interrogative_talk/results.json diff --git a/evaluation_val/hyperpartisan_news_detection/byarticle/consider_does_it_follow_a_hyperpartisan_argumentation/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/hyperpartisan_news_detection/byarticle/consider_does_it_follow_a_hyperpartisan_argumentation/results.json similarity index 100% rename from evaluation_val/hyperpartisan_news_detection/byarticle/consider_does_it_follow_a_hyperpartisan_argumentation/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/hyperpartisan_news_detection/byarticle/consider_does_it_follow_a_hyperpartisan_argumentation/results.json diff --git a/evaluation_val/hyperpartisan_news_detection/byarticle/consider_it_exhibits_extreme_one_sidedness/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/hyperpartisan_news_detection/byarticle/consider_it_exhibits_extreme_one_sidedness/results.json similarity index 100% rename from evaluation_val/hyperpartisan_news_detection/byarticle/consider_it_exhibits_extreme_one_sidedness/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/hyperpartisan_news_detection/byarticle/consider_it_exhibits_extreme_one_sidedness/results.json diff --git a/evaluation_val/hyperpartisan_news_detection/byarticle/consume_with_caution/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/hyperpartisan_news_detection/byarticle/consume_with_caution/results.json similarity index 100% rename from evaluation_val/hyperpartisan_news_detection/byarticle/consume_with_caution/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/hyperpartisan_news_detection/byarticle/consume_with_caution/results.json diff --git a/evaluation_val/hyperpartisan_news_detection/byarticle/extreme_left_wing_or_right_wing/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/hyperpartisan_news_detection/byarticle/extreme_left_wing_or_right_wing/results.json similarity index 100% rename from evaluation_val/hyperpartisan_news_detection/byarticle/extreme_left_wing_or_right_wing/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/hyperpartisan_news_detection/byarticle/extreme_left_wing_or_right_wing/results.json diff --git a/evaluation_val/hyperpartisan_news_detection/byarticle/follows_hyperpartisan_argumentation/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/hyperpartisan_news_detection/byarticle/follows_hyperpartisan_argumentation/results.json similarity index 100% rename from evaluation_val/hyperpartisan_news_detection/byarticle/follows_hyperpartisan_argumentation/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/hyperpartisan_news_detection/byarticle/follows_hyperpartisan_argumentation/results.json diff --git a/evaluation_val/liar/Given_statement_guess_category/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/liar/Given_statement_guess_category/results.json similarity index 100% rename from evaluation_val/liar/Given_statement_guess_category/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/liar/Given_statement_guess_category/results.json diff --git a/evaluation_val/lince/sa_spaeng/express_sentiment/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/lince/sa_spaeng/express_sentiment/results.json similarity index 100% rename from evaluation_val/lince/sa_spaeng/express_sentiment/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/lince/sa_spaeng/express_sentiment/results.json diff --git a/evaluation_val/lince/sa_spaeng/negation_template/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/lince/sa_spaeng/negation_template/results.json similarity index 100% rename from evaluation_val/lince/sa_spaeng/negation_template/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/lince/sa_spaeng/negation_template/results.json diff --git a/evaluation_val/lince/sa_spaeng/original_poster_expressed_sentiment/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/lince/sa_spaeng/original_poster_expressed_sentiment/results.json similarity index 100% rename from evaluation_val/lince/sa_spaeng/original_poster_expressed_sentiment/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/lince/sa_spaeng/original_poster_expressed_sentiment/results.json diff --git a/evaluation_val/lince/sa_spaeng/sentiment_trying_to_express/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/lince/sa_spaeng/sentiment_trying_to_express/results.json similarity index 100% rename from evaluation_val/lince/sa_spaeng/sentiment_trying_to_express/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/lince/sa_spaeng/sentiment_trying_to_express/results.json diff --git a/evaluation_val/lince/sa_spaeng/the_author_seem/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/lince/sa_spaeng/the_author_seem/results.json similarity index 100% rename from evaluation_val/lince/sa_spaeng/the_author_seem/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/lince/sa_spaeng/the_author_seem/results.json diff --git a/evaluation_val/math_qa/choose_correct_og/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/math_qa/choose_correct_og/results.json similarity index 100% rename from evaluation_val/math_qa/choose_correct_og/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/math_qa/choose_correct_og/results.json diff --git a/evaluation_val/math_qa/first_choice_then_problem/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/math_qa/first_choice_then_problem/results.json similarity index 100% rename from evaluation_val/math_qa/first_choice_then_problem/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/math_qa/first_choice_then_problem/results.json diff --git a/evaluation_val/math_qa/gre_problem/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/math_qa/gre_problem/results.json similarity index 100% rename from evaluation_val/math_qa/gre_problem/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/math_qa/gre_problem/results.json diff --git a/evaluation_val/math_qa/pick_the_correct/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/math_qa/pick_the_correct/results.json similarity index 100% rename from evaluation_val/math_qa/pick_the_correct/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/math_qa/pick_the_correct/results.json diff --git a/evaluation_val/math_qa/problem_set_type/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/math_qa/problem_set_type/results.json similarity index 100% rename from evaluation_val/math_qa/problem_set_type/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/math_qa/problem_set_type/results.json diff --git a/evaluation_val/merged.csv b/evaluation_bloomz-7b1-p3/evaluation_val/merged.csv similarity index 100% rename from evaluation_val/merged.csv rename to evaluation_bloomz-7b1-p3/evaluation_val/merged.csv diff --git a/evaluation_val/merged.json b/evaluation_bloomz-7b1-p3/evaluation_val/merged.json similarity index 100% rename from evaluation_val/merged.json rename to evaluation_bloomz-7b1-p3/evaluation_val/merged.json diff --git a/evaluation_val/movie_rationales/Evidences_+_review/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/movie_rationales/Evidences_+_review/results.json similarity index 100% rename from evaluation_val/movie_rationales/Evidences_+_review/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/movie_rationales/Evidences_+_review/results.json diff --git a/evaluation_val/movie_rationales/Evidences_sentiment_classification/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/movie_rationales/Evidences_sentiment_classification/results.json similarity index 100% rename from evaluation_val/movie_rationales/Evidences_sentiment_classification/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/movie_rationales/Evidences_sentiment_classification/results.json diff --git a/evaluation_val/movie_rationales/Standard_binary_sentiment_analysis/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/movie_rationales/Standard_binary_sentiment_analysis/results.json similarity index 100% rename from evaluation_val/movie_rationales/Standard_binary_sentiment_analysis/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/movie_rationales/Standard_binary_sentiment_analysis/results.json diff --git a/evaluation_val/mwsc/in-the-sentence-question-first/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/mwsc/in-the-sentence-question-first/results.json similarity index 100% rename from evaluation_val/mwsc/in-the-sentence-question-first/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/mwsc/in-the-sentence-question-first/results.json diff --git a/evaluation_val/mwsc/in-the-sentence/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/mwsc/in-the-sentence/results.json similarity index 100% rename from evaluation_val/mwsc/in-the-sentence/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/mwsc/in-the-sentence/results.json diff --git a/evaluation_val/mwsc/is-correct/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/mwsc/is-correct/results.json similarity index 100% rename from evaluation_val/mwsc/is-correct/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/mwsc/is-correct/results.json diff --git a/evaluation_val/mwsc/options-or/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/mwsc/options-or/results.json similarity index 100% rename from evaluation_val/mwsc/options-or/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/mwsc/options-or/results.json diff --git a/evaluation_val/mwsc/what-think/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/mwsc/what-think/results.json similarity index 100% rename from evaluation_val/mwsc/what-think/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/mwsc/what-think/results.json diff --git a/evaluation_val/onestop_english/ara_context/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/onestop_english/ara_context/results.json similarity index 100% rename from evaluation_val/onestop_english/ara_context/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/onestop_english/ara_context/results.json diff --git a/evaluation_val/onestop_english/assess/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/onestop_english/assess/results.json similarity index 100% rename from evaluation_val/onestop_english/assess/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/onestop_english/assess/results.json diff --git a/evaluation_val/onestop_english/determine_reading_level_from_the_first_three_sentences/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/onestop_english/determine_reading_level_from_the_first_three_sentences/results.json similarity index 100% rename from evaluation_val/onestop_english/determine_reading_level_from_the_first_three_sentences/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/onestop_english/determine_reading_level_from_the_first_three_sentences/results.json diff --git a/evaluation_val/onestop_english/esl_context/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/onestop_english/esl_context/results.json similarity index 100% rename from evaluation_val/onestop_english/esl_context/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/onestop_english/esl_context/results.json diff --git a/evaluation_val/onestop_english/esl_variation/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/onestop_english/esl_variation/results.json similarity index 100% rename from evaluation_val/onestop_english/esl_variation/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/onestop_english/esl_variation/results.json diff --git a/evaluation_val/poem_sentiment/guess_sentiment_without_options_variation_1/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/poem_sentiment/guess_sentiment_without_options_variation_1/results.json similarity index 100% rename from evaluation_val/poem_sentiment/guess_sentiment_without_options_variation_1/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/poem_sentiment/guess_sentiment_without_options_variation_1/results.json diff --git a/evaluation_val/poem_sentiment/most_appropriate_sentiment/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/poem_sentiment/most_appropriate_sentiment/results.json similarity index 100% rename from evaluation_val/poem_sentiment/most_appropriate_sentiment/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/poem_sentiment/most_appropriate_sentiment/results.json diff --git a/evaluation_val/poem_sentiment/positive_or_negative_sentiment_variation_1/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/poem_sentiment/positive_or_negative_sentiment_variation_1/results.json similarity index 100% rename from evaluation_val/poem_sentiment/positive_or_negative_sentiment_variation_1/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/poem_sentiment/positive_or_negative_sentiment_variation_1/results.json diff --git a/evaluation_val/poem_sentiment/positive_or_negative_sentiment_variation_2/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/poem_sentiment/positive_or_negative_sentiment_variation_2/results.json similarity index 100% rename from evaluation_val/poem_sentiment/positive_or_negative_sentiment_variation_2/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/poem_sentiment/positive_or_negative_sentiment_variation_2/results.json diff --git a/evaluation_val/poem_sentiment/question_answer_format/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/poem_sentiment/question_answer_format/results.json similarity index 100% rename from evaluation_val/poem_sentiment/question_answer_format/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/poem_sentiment/question_answer_format/results.json diff --git a/evaluation_val/pubmed_qa/pqa_labeled/Long_Answer_to_Final_Decision/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/pubmed_qa/pqa_labeled/Long_Answer_to_Final_Decision/results.json similarity index 100% rename from evaluation_val/pubmed_qa/pqa_labeled/Long_Answer_to_Final_Decision/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/pubmed_qa/pqa_labeled/Long_Answer_to_Final_Decision/results.json diff --git a/evaluation_val/pubmed_qa/pqa_labeled/Question_Answering_(Short)/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/pubmed_qa/pqa_labeled/Question_Answering_(Short)/results.json similarity index 100% rename from evaluation_val/pubmed_qa/pqa_labeled/Question_Answering_(Short)/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/pubmed_qa/pqa_labeled/Question_Answering_(Short)/results.json diff --git a/evaluation_val/riddle_sense/answer_given_question_without_options/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/riddle_sense/answer_given_question_without_options/results.json similarity index 100% rename from evaluation_val/riddle_sense/answer_given_question_without_options/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/riddle_sense/answer_given_question_without_options/results.json diff --git a/evaluation_val/riddle_sense/most_suitable_answer/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/riddle_sense/most_suitable_answer/results.json similarity index 100% rename from evaluation_val/riddle_sense/most_suitable_answer/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/riddle_sense/most_suitable_answer/results.json diff --git a/evaluation_val/riddle_sense/question_answering/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/riddle_sense/question_answering/results.json similarity index 100% rename from evaluation_val/riddle_sense/question_answering/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/riddle_sense/question_answering/results.json diff --git a/evaluation_val/riddle_sense/question_to_answer_index/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/riddle_sense/question_to_answer_index/results.json similarity index 100% rename from evaluation_val/riddle_sense/question_to_answer_index/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/riddle_sense/question_to_answer_index/results.json diff --git a/evaluation_val/scicite/Classify_intent/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/scicite/Classify_intent/results.json similarity index 100% rename from evaluation_val/scicite/Classify_intent/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/scicite/Classify_intent/results.json diff --git a/evaluation_val/scicite/Classify_intent_(choices_first)/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/scicite/Classify_intent_(choices_first)/results.json similarity index 100% rename from evaluation_val/scicite/Classify_intent_(choices_first)/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/scicite/Classify_intent_(choices_first)/results.json diff --git a/evaluation_val/scicite/Classify_intent_(select_choice)/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/scicite/Classify_intent_(select_choice)/results.json similarity index 100% rename from evaluation_val/scicite/Classify_intent_(select_choice)/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/scicite/Classify_intent_(select_choice)/results.json diff --git a/evaluation_val/scicite/Classify_intent_w_section_(select_choice)/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/scicite/Classify_intent_w_section_(select_choice)/results.json similarity index 100% rename from evaluation_val/scicite/Classify_intent_w_section_(select_choice)/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/scicite/Classify_intent_w_section_(select_choice)/results.json diff --git a/evaluation_val/scicite/can_describe/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/scicite/can_describe/results.json similarity index 100% rename from evaluation_val/scicite/can_describe/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/scicite/can_describe/results.json diff --git a/evaluation_val/selqa/answer_selection_analysis/is-he-talking-about/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/selqa/answer_selection_analysis/is-he-talking-about/results.json similarity index 100% rename from evaluation_val/selqa/answer_selection_analysis/is-he-talking-about/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/selqa/answer_selection_analysis/is-he-talking-about/results.json diff --git a/evaluation_val/selqa/answer_selection_analysis/make-sense-rand/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/selqa/answer_selection_analysis/make-sense-rand/results.json similarity index 100% rename from evaluation_val/selqa/answer_selection_analysis/make-sense-rand/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/selqa/answer_selection_analysis/make-sense-rand/results.json diff --git a/evaluation_val/selqa/answer_selection_analysis/which-answer-1st-vs-random/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/selqa/answer_selection_analysis/which-answer-1st-vs-random/results.json similarity index 100% rename from evaluation_val/selqa/answer_selection_analysis/which-answer-1st-vs-random/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/selqa/answer_selection_analysis/which-answer-1st-vs-random/results.json diff --git a/evaluation_val/selqa/answer_selection_analysis/would-make-sense-qu-rand/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/selqa/answer_selection_analysis/would-make-sense-qu-rand/results.json similarity index 100% rename from evaluation_val/selqa/answer_selection_analysis/would-make-sense-qu-rand/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/selqa/answer_selection_analysis/would-make-sense-qu-rand/results.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json b/evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json similarity index 100% rename from evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json rename to evaluation_bloomz-7b1-p3/evaluation_val/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.json diff --git a/evaluation_val/snips_built_in_intents/categorize_query/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/snips_built_in_intents/categorize_query/results.json similarity index 100% rename from evaluation_val/snips_built_in_intents/categorize_query/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/snips_built_in_intents/categorize_query/results.json diff --git a/evaluation_val/snips_built_in_intents/categorize_query_brief/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/snips_built_in_intents/categorize_query_brief/results.json similarity index 100% rename from evaluation_val/snips_built_in_intents/categorize_query_brief/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/snips_built_in_intents/categorize_query_brief/results.json diff --git a/evaluation_val/snips_built_in_intents/intent_query/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/snips_built_in_intents/intent_query/results.json similarity index 100% rename from evaluation_val/snips_built_in_intents/intent_query/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/snips_built_in_intents/intent_query/results.json diff --git a/evaluation_val/snips_built_in_intents/query_intent/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/snips_built_in_intents/query_intent/results.json similarity index 100% rename from evaluation_val/snips_built_in_intents/query_intent/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/snips_built_in_intents/query_intent/results.json diff --git a/evaluation_val/snips_built_in_intents/voice_intent/results.json b/evaluation_bloomz-7b1-p3/evaluation_val/snips_built_in_intents/voice_intent/results.json similarity index 100% rename from evaluation_val/snips_built_in_intents/voice_intent/results.json rename to evaluation_bloomz-7b1-p3/evaluation_val/snips_built_in_intents/voice_intent/results.json diff --git a/evaluation_val/wmt14_hi_en/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json b/evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json similarity index 100% rename from evaluation_val/wmt14_hi_en/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json rename to evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json diff --git a/evaluation_val/wmt14_hi_en/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json b/evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json similarity index 100% rename from evaluation_val/wmt14_hi_en/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json rename to evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/agg.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json diff --git a/evaluation_val/wmt14_hi_en/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.jsonl similarity index 100% rename from evaluation_val/wmt14_hi_en/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.jsonl diff --git a/evaluation_val/wmt14_hi_en/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.jsonl b/evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.jsonl similarity index 100% rename from evaluation_val/wmt14_hi_en/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.jsonl rename to evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.jsonl diff --git a/evaluation_val/wmt14_hi_en/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json b/evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json similarity index 100% rename from evaluation_val/wmt14_hi_en/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json rename to evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json diff --git a/evaluation_val/wmt14_hi_en/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json b/evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json similarity index 100% rename from evaluation_val/wmt14_hi_en/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json rename to evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/slim.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.json diff --git a/evaluation_xnlihtmt/xnliht/ar/GPT-3_style_arht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ar/GPT-3_style_arht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ar/GPT-3_style_arht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ar/GPT-3_style_arht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ar/MNLI_crowdsource_arht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ar/MNLI_crowdsource_arht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ar/MNLI_crowdsource_arht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ar/MNLI_crowdsource_arht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ar/can_we_infer_arht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ar/can_we_infer_arht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ar/can_we_infer_arht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ar/can_we_infer_arht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ar/guaranteed_possible_impossible_arht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ar/guaranteed_possible_impossible_arht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ar/guaranteed_possible_impossible_arht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ar/guaranteed_possible_impossible_arht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ar/justified_in_saying_arht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ar/justified_in_saying_arht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ar/justified_in_saying_arht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ar/justified_in_saying_arht/results.json diff --git a/evaluation_xnlihtmt/xnliht/es/GPT-3_style_esht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/es/GPT-3_style_esht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/es/GPT-3_style_esht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/es/GPT-3_style_esht/results.json diff --git a/evaluation_xnlihtmt/xnliht/es/MNLI_crowdsource_esht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/es/MNLI_crowdsource_esht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/es/MNLI_crowdsource_esht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/es/MNLI_crowdsource_esht/results.json diff --git a/evaluation_xnlihtmt/xnliht/es/can_we_infer_esht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/es/can_we_infer_esht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/es/can_we_infer_esht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/es/can_we_infer_esht/results.json diff --git a/evaluation_xnlihtmt/xnliht/es/guaranteed_possible_impossible_esht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/es/guaranteed_possible_impossible_esht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/es/guaranteed_possible_impossible_esht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/es/guaranteed_possible_impossible_esht/results.json diff --git a/evaluation_xnlihtmt/xnliht/es/justified_in_saying_esht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/es/justified_in_saying_esht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/es/justified_in_saying_esht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/es/justified_in_saying_esht/results.json diff --git a/evaluation_xnlihtmt/xnliht/fr/GPT-3_style_frht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/fr/GPT-3_style_frht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/fr/GPT-3_style_frht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/fr/GPT-3_style_frht/results.json diff --git a/evaluation_xnlihtmt/xnliht/fr/MNLI_crowdsource_frht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/fr/MNLI_crowdsource_frht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/fr/MNLI_crowdsource_frht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/fr/MNLI_crowdsource_frht/results.json diff --git a/evaluation_xnlihtmt/xnliht/fr/can_we_infer_frht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/fr/can_we_infer_frht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/fr/can_we_infer_frht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/fr/can_we_infer_frht/results.json diff --git a/evaluation_xnlihtmt/xnliht/fr/guaranteed_possible_impossible_frht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/fr/guaranteed_possible_impossible_frht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/fr/guaranteed_possible_impossible_frht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/fr/guaranteed_possible_impossible_frht/results.json diff --git a/evaluation_xnlihtmt/xnliht/fr/justified_in_saying_frht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/fr/justified_in_saying_frht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/fr/justified_in_saying_frht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/fr/justified_in_saying_frht/results.json diff --git a/evaluation_xnlihtmt/xnliht/hi/GPT-3_style_hiht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/hi/GPT-3_style_hiht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/hi/GPT-3_style_hiht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/hi/GPT-3_style_hiht/results.json diff --git a/evaluation_xnlihtmt/xnliht/hi/MNLI_crowdsource_hiht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/hi/MNLI_crowdsource_hiht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/hi/MNLI_crowdsource_hiht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/hi/MNLI_crowdsource_hiht/results.json diff --git a/evaluation_xnlihtmt/xnliht/hi/can_we_infer_hiht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/hi/can_we_infer_hiht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/hi/can_we_infer_hiht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/hi/can_we_infer_hiht/results.json diff --git a/evaluation_xnlihtmt/xnliht/hi/guaranteed_possible_impossible_hiht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/hi/guaranteed_possible_impossible_hiht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/hi/guaranteed_possible_impossible_hiht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/hi/guaranteed_possible_impossible_hiht/results.json diff --git a/evaluation_xnlihtmt/xnliht/hi/justified_in_saying_hiht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/hi/justified_in_saying_hiht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/hi/justified_in_saying_hiht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/hi/justified_in_saying_hiht/results.json diff --git a/evaluation_xnlihtmt/xnliht/merged.csv b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/merged.csv similarity index 100% rename from evaluation_xnlihtmt/xnliht/merged.csv rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/merged.csv diff --git a/evaluation_xnlihtmt/xnliht/merged.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/merged.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/merged.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/merged.json diff --git a/evaluation_xnlihtmt/xnliht/sw/GPT-3_style_swht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/sw/GPT-3_style_swht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/sw/GPT-3_style_swht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/sw/GPT-3_style_swht/results.json diff --git a/evaluation_xnlihtmt/xnliht/sw/MNLI_crowdsource_swht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/sw/MNLI_crowdsource_swht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/sw/MNLI_crowdsource_swht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/sw/MNLI_crowdsource_swht/results.json diff --git a/evaluation_xnlihtmt/xnliht/sw/can_we_infer_swht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/sw/can_we_infer_swht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/sw/can_we_infer_swht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/sw/can_we_infer_swht/results.json diff --git a/evaluation_xnlihtmt/xnliht/sw/guaranteed_possible_impossible_swht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/sw/guaranteed_possible_impossible_swht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/sw/guaranteed_possible_impossible_swht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/sw/guaranteed_possible_impossible_swht/results.json diff --git a/evaluation_xnlihtmt/xnliht/sw/justified_in_saying_swht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/sw/justified_in_saying_swht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/sw/justified_in_saying_swht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/sw/justified_in_saying_swht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ur/GPT-3_style_urht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ur/GPT-3_style_urht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ur/GPT-3_style_urht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ur/GPT-3_style_urht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ur/MNLI_crowdsource_urht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ur/MNLI_crowdsource_urht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ur/MNLI_crowdsource_urht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ur/MNLI_crowdsource_urht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ur/can_we_infer_urht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ur/can_we_infer_urht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ur/can_we_infer_urht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ur/can_we_infer_urht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ur/guaranteed_possible_impossible_urht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ur/guaranteed_possible_impossible_urht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ur/guaranteed_possible_impossible_urht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ur/guaranteed_possible_impossible_urht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ur/justified_in_saying_urht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ur/justified_in_saying_urht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ur/justified_in_saying_urht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/ur/justified_in_saying_urht/results.json diff --git a/evaluation_xnlihtmt/xnliht/vi/GPT-3_style_viht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/vi/GPT-3_style_viht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/vi/GPT-3_style_viht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/vi/GPT-3_style_viht/results.json diff --git a/evaluation_xnlihtmt/xnliht/vi/MNLI_crowdsource_viht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/vi/MNLI_crowdsource_viht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/vi/MNLI_crowdsource_viht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/vi/MNLI_crowdsource_viht/results.json diff --git a/evaluation_xnlihtmt/xnliht/vi/can_we_infer_viht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/vi/can_we_infer_viht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/vi/can_we_infer_viht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/vi/can_we_infer_viht/results.json diff --git a/evaluation_xnlihtmt/xnliht/vi/guaranteed_possible_impossible_viht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/vi/guaranteed_possible_impossible_viht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/vi/guaranteed_possible_impossible_viht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/vi/guaranteed_possible_impossible_viht/results.json diff --git a/evaluation_xnlihtmt/xnliht/vi/justified_in_saying_viht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/vi/justified_in_saying_viht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/vi/justified_in_saying_viht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/vi/justified_in_saying_viht/results.json diff --git a/evaluation_xnlihtmt/xnliht/zh/GPT-3_style_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/zh/GPT-3_style_zhht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/zh/GPT-3_style_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/zh/GPT-3_style_zhht/results.json diff --git a/evaluation_xnlihtmt/xnliht/zh/MNLI_crowdsource_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/zh/MNLI_crowdsource_zhht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/zh/MNLI_crowdsource_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/zh/MNLI_crowdsource_zhht/results.json diff --git a/evaluation_xnlihtmt/xnliht/zh/can_we_infer_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/zh/can_we_infer_zhht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/zh/can_we_infer_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/zh/can_we_infer_zhht/results.json diff --git a/evaluation_xnlihtmt/xnliht/zh/guaranteed_possible_impossible_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/zh/guaranteed_possible_impossible_zhht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/zh/guaranteed_possible_impossible_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/zh/guaranteed_possible_impossible_zhht/results.json diff --git a/evaluation_xnlihtmt/xnliht/zh/justified_in_saying_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/zh/justified_in_saying_zhht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/zh/justified_in_saying_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/zh/justified_in_saying_zhht/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ar/GPT-3_style_armt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ar/GPT-3_style_armt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ar/GPT-3_style_armt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ar/GPT-3_style_armt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ar/MNLI_crowdsource_armt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ar/MNLI_crowdsource_armt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ar/MNLI_crowdsource_armt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ar/MNLI_crowdsource_armt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ar/can_we_infer_armt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ar/can_we_infer_armt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ar/can_we_infer_armt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ar/can_we_infer_armt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ar/guaranteed_possible_impossible_armt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ar/guaranteed_possible_impossible_armt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ar/guaranteed_possible_impossible_armt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ar/guaranteed_possible_impossible_armt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ar/justified_in_saying_armt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ar/justified_in_saying_armt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ar/justified_in_saying_armt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ar/justified_in_saying_armt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/es/GPT-3_style_esmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/es/GPT-3_style_esmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/es/GPT-3_style_esmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/es/GPT-3_style_esmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/es/MNLI_crowdsource_esmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/es/MNLI_crowdsource_esmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/es/MNLI_crowdsource_esmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/es/MNLI_crowdsource_esmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/es/can_we_infer_esmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/es/can_we_infer_esmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/es/can_we_infer_esmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/es/can_we_infer_esmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/es/guaranteed_possible_impossible_esmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/es/guaranteed_possible_impossible_esmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/es/guaranteed_possible_impossible_esmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/es/guaranteed_possible_impossible_esmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/es/justified_in_saying_esmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/es/justified_in_saying_esmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/es/justified_in_saying_esmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/es/justified_in_saying_esmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/fr/GPT-3_style_frmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/fr/GPT-3_style_frmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/fr/GPT-3_style_frmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/fr/GPT-3_style_frmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/fr/MNLI_crowdsource_frmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/fr/MNLI_crowdsource_frmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/fr/MNLI_crowdsource_frmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/fr/MNLI_crowdsource_frmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/fr/can_we_infer_frmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/fr/can_we_infer_frmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/fr/can_we_infer_frmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/fr/can_we_infer_frmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/fr/guaranteed_possible_impossible_frmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/fr/guaranteed_possible_impossible_frmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/fr/guaranteed_possible_impossible_frmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/fr/guaranteed_possible_impossible_frmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/fr/justified_in_saying_frmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/fr/justified_in_saying_frmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/fr/justified_in_saying_frmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/fr/justified_in_saying_frmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/hi/GPT-3_style_himt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/hi/GPT-3_style_himt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/hi/GPT-3_style_himt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/hi/GPT-3_style_himt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/hi/MNLI_crowdsource_himt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/hi/MNLI_crowdsource_himt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/hi/MNLI_crowdsource_himt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/hi/MNLI_crowdsource_himt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/hi/can_we_infer_himt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/hi/can_we_infer_himt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/hi/can_we_infer_himt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/hi/can_we_infer_himt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/hi/guaranteed_possible_impossible_himt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/hi/guaranteed_possible_impossible_himt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/hi/guaranteed_possible_impossible_himt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/hi/guaranteed_possible_impossible_himt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/hi/justified_in_saying_himt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/hi/justified_in_saying_himt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/hi/justified_in_saying_himt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/hi/justified_in_saying_himt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/merged.csv b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/merged.csv similarity index 100% rename from evaluation_xnlihtmt/xnlimt/merged.csv rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/merged.csv diff --git a/evaluation_xnlihtmt/xnlimt/merged.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/merged.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/merged.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/merged.json diff --git a/evaluation_xnlihtmt/xnlimt/sw/GPT-3_style_swmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/sw/GPT-3_style_swmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/sw/GPT-3_style_swmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/sw/GPT-3_style_swmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/sw/MNLI_crowdsource_swmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/sw/MNLI_crowdsource_swmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/sw/MNLI_crowdsource_swmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/sw/MNLI_crowdsource_swmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/sw/can_we_infer_swmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/sw/can_we_infer_swmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/sw/can_we_infer_swmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/sw/can_we_infer_swmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/sw/guaranteed_possible_impossible_swmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/sw/guaranteed_possible_impossible_swmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/sw/guaranteed_possible_impossible_swmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/sw/guaranteed_possible_impossible_swmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/sw/justified_in_saying_swmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/sw/justified_in_saying_swmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/sw/justified_in_saying_swmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/sw/justified_in_saying_swmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ur/GPT-3_style_urmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ur/GPT-3_style_urmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ur/GPT-3_style_urmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ur/GPT-3_style_urmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ur/MNLI_crowdsource_urmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ur/MNLI_crowdsource_urmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ur/MNLI_crowdsource_urmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ur/MNLI_crowdsource_urmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ur/can_we_infer_urmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ur/can_we_infer_urmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ur/can_we_infer_urmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ur/can_we_infer_urmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ur/guaranteed_possible_impossible_urmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ur/guaranteed_possible_impossible_urmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ur/guaranteed_possible_impossible_urmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ur/guaranteed_possible_impossible_urmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ur/justified_in_saying_urmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ur/justified_in_saying_urmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ur/justified_in_saying_urmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/ur/justified_in_saying_urmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/vi/GPT-3_style_vimt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/vi/GPT-3_style_vimt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/vi/GPT-3_style_vimt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/vi/GPT-3_style_vimt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/vi/MNLI_crowdsource_vimt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/vi/MNLI_crowdsource_vimt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/vi/MNLI_crowdsource_vimt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/vi/MNLI_crowdsource_vimt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/vi/can_we_infer_vimt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/vi/can_we_infer_vimt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/vi/can_we_infer_vimt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/vi/can_we_infer_vimt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/vi/guaranteed_possible_impossible_vimt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/vi/guaranteed_possible_impossible_vimt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/vi/guaranteed_possible_impossible_vimt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/vi/guaranteed_possible_impossible_vimt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/vi/justified_in_saying_vimt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/vi/justified_in_saying_vimt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/vi/justified_in_saying_vimt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/vi/justified_in_saying_vimt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/zh/GPT-3_style_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/zh/GPT-3_style_zhmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/zh/GPT-3_style_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/zh/GPT-3_style_zhmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/zh/MNLI_crowdsource_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/zh/MNLI_crowdsource_zhmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/zh/MNLI_crowdsource_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/zh/MNLI_crowdsource_zhmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/zh/can_we_infer_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/zh/can_we_infer_zhmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/zh/can_we_infer_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/zh/can_we_infer_zhmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/zh/guaranteed_possible_impossible_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/zh/guaranteed_possible_impossible_zhmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/zh/guaranteed_possible_impossible_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/zh/guaranteed_possible_impossible_zhmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/zh/justified_in_saying_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/zh/justified_in_saying_zhmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/zh/justified_in_saying_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xnlimt/xnli/zh/justified_in_saying_zhmt/results.json diff --git a/evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json diff --git a/evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json diff --git a/evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json diff --git a/evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json diff --git a/evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json diff --git a/evaluation_copawinostoryht/Muennighoff_xwinograd/zh/Replace_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/Replace_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/Muennighoff_xwinograd/zh/Replace_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/Replace_zhht/results.json diff --git a/evaluation_copawinostoryht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json diff --git a/evaluation_copawinostoryht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json diff --git a/evaluation_copawinostoryht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json diff --git a/evaluation_copawinostoryht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json diff --git a/evaluation_copawinostoryht/merged.csv b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/merged.csv similarity index 100% rename from evaluation_copawinostoryht/merged.csv rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/merged.csv diff --git a/evaluation_copawinostoryht/merged.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/merged.json similarity index 100% rename from evaluation_copawinostoryht/merged.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/merged.json diff --git a/evaluation_copawinostoryht/xcopa/zh/C1_or_C2?_premise_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/xcopa/zh/C1_or_C2?_premise_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/xcopa/zh/C1_or_C2?_premise_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/xcopa/zh/C1_or_C2?_premise_zhht/results.json diff --git a/evaluation_copawinostoryht/xcopa/zh/best_option_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/xcopa/zh/best_option_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/xcopa/zh/best_option_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/xcopa/zh/best_option_zhht/results.json diff --git a/evaluation_copawinostoryht/xcopa/zh/cause_effect_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/xcopa/zh/cause_effect_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/xcopa/zh/cause_effect_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/xcopa/zh/cause_effect_zhht/results.json diff --git a/evaluation_copawinostoryht/xcopa/zh/i_am_hesitating_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/xcopa/zh/i_am_hesitating_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/xcopa/zh/i_am_hesitating_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/xcopa/zh/i_am_hesitating_zhht/results.json diff --git a/evaluation_copawinostoryht/xcopa/zh/plausible_alternatives_zhht/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/xcopa/zh/plausible_alternatives_zhht/results.json similarity index 100% rename from evaluation_copawinostoryht/xcopa/zh/plausible_alternatives_zhht/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopaht/xcopa/zh/plausible_alternatives_zhht/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/fr/Replace_frmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/fr/Replace_frmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json diff --git a/evaluation_copawinostorymt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json diff --git a/evaluation_copawinostorymt/merged.csv b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/merged.csv similarity index 100% rename from evaluation_copawinostorymt/merged.csv rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/merged.csv diff --git a/evaluation_copawinostorymt/merged.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/merged.json similarity index 100% rename from evaluation_copawinostorymt/merged.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/merged.json diff --git a/evaluation_copawinostorymt/xcopa/id/C1_or_C2?_premise_idmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/id/C1_or_C2?_premise_idmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/id/best_option_idmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/id/best_option_idmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/id/cause_effect_idmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/id/cause_effect_idmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/id/i_am_hesitating_idmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/id/i_am_hesitating_idmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/id/plausible_alternatives_idmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/id/plausible_alternatives_idmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/sw/C1_or_C2?_premise_swmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/sw/C1_or_C2?_premise_swmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/sw/best_option_swmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/sw/best_option_swmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/sw/cause_effect_swmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/sw/cause_effect_swmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/sw/i_am_hesitating_swmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/sw/i_am_hesitating_swmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/sw/plausible_alternatives_swmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/sw/plausible_alternatives_swmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/ta/C1_or_C2?_premise_tamt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/ta/C1_or_C2?_premise_tamt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json diff --git a/evaluation_copawinostorymt/xcopa/ta/best_option_tamt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/ta/best_option_tamt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json diff --git a/evaluation_copawinostorymt/xcopa/ta/cause_effect_tamt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/ta/cause_effect_tamt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json diff --git a/evaluation_copawinostorymt/xcopa/ta/i_am_hesitating_tamt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/ta/i_am_hesitating_tamt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json diff --git a/evaluation_copawinostorymt/xcopa/ta/plausible_alternatives_tamt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/ta/plausible_alternatives_tamt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json diff --git a/evaluation_copawinostorymt/xcopa/vi/C1_or_C2?_premise_vimt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/vi/C1_or_C2?_premise_vimt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json diff --git a/evaluation_copawinostorymt/xcopa/vi/best_option_vimt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/vi/best_option_vimt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json diff --git a/evaluation_copawinostorymt/xcopa/vi/cause_effect_vimt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/vi/cause_effect_vimt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json diff --git a/evaluation_copawinostorymt/xcopa/vi/i_am_hesitating_vimt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/vi/i_am_hesitating_vimt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json diff --git a/evaluation_copawinostorymt/xcopa/vi/plausible_alternatives_vimt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/vi/plausible_alternatives_vimt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json diff --git a/evaluation_copawinostorymt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/zh/best_option_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/zh/best_option_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/zh/cause_effect_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/zh/cause_effect_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/zh/i_am_hesitating_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/zh/i_am_hesitating_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json diff --git a/evaluation_copawinostorymt/xcopa/zh/plausible_alternatives_zhmt/results.json b/evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json similarity index 100% rename from evaluation_copawinostorymt/xcopa/zh/plausible_alternatives_zhmt/results.json rename to evaluation_bloomz-7b1-p3/evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json