diff --git a/.gitattributes b/.gitattributes
index 1fd4826e6f1aafb9303f7a6f9709083bd5723fc3..af3ee52691bfc0434d58a4fd62e65013132f98a8 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -31,3 +31,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
+logs/logs/main_log.txt filter=lfs diff=lfs merge=lfs -text
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json
diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json
similarity index 100%
rename from evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json
diff --git a/evaluation_l1/anli/dev_r1/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r1/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/GPT-3_style/results.json
diff --git a/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/anli/dev_r1/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r1/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/can_we_infer/results.json
diff --git a/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/anli/dev_r1/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r1/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/justified_in_saying/results.json
diff --git a/evaluation_l1/anli/dev_r2/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r2/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/GPT-3_style/results.json
diff --git a/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/anli/dev_r2/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r2/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/can_we_infer/results.json
diff --git a/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/anli/dev_r2/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r2/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/justified_in_saying/results.json
diff --git a/evaluation_l1/anli/dev_r3/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r3/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/GPT-3_style/results.json
diff --git a/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/anli/dev_r3/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r3/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/can_we_infer/results.json
diff --git a/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/anli/dev_r3/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/anli/dev_r3/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/justified_in_saying/results.json
diff --git a/evaluation_l1/merged.csv b/evaluation_bloomz-7b1/evaluation_l1/merged.csv
similarity index 100%
rename from evaluation_l1/merged.csv
rename to evaluation_bloomz-7b1/evaluation_l1/merged.csv
diff --git a/evaluation_l1/merged.json b/evaluation_bloomz-7b1/evaluation_l1/merged.json
similarity index 100%
rename from evaluation_l1/merged.json
rename to evaluation_bloomz-7b1/evaluation_l1/merged.json
diff --git a/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json
similarity index 100%
rename from evaluation_l1/story_cloze/2016/Answer_Given_options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json
diff --git a/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json
similarity index 100%
rename from evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json
diff --git a/evaluation_l1/story_cloze/2016/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Generate_Ending/results.json
similarity index 100%
rename from evaluation_l1/story_cloze/2016/Generate_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Generate_Ending/results.json
diff --git a/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json
similarity index 100%
rename from evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json
diff --git a/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json
similarity index 100%
rename from evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json
diff --git a/evaluation_l1/super_glue/cb/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/super_glue/cb/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/GPT-3_style/results.json
diff --git a/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/super_glue/cb/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/super_glue/cb/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/can_we_infer/results.json
diff --git a/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/super_glue/cb/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/super_glue/cb/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/justified_in_saying/results.json
diff --git a/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json
similarity index 100%
rename from evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json
diff --git a/evaluation_l1/super_glue/copa/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/best_option/results.json
similarity index 100%
rename from evaluation_l1/super_glue/copa/best_option/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/best_option/results.json
diff --git a/evaluation_l1/super_glue/copa/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/cause_effect/results.json
similarity index 100%
rename from evaluation_l1/super_glue/copa/cause_effect/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/cause_effect/results.json
diff --git a/evaluation_l1/super_glue/copa/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/i_am_hesitating/results.json
similarity index 100%
rename from evaluation_l1/super_glue/copa/i_am_hesitating/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/i_am_hesitating/results.json
diff --git a/evaluation_l1/super_glue/copa/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/plausible_alternatives/results.json
similarity index 100%
rename from evaluation_l1/super_glue/copa/plausible_alternatives/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/plausible_alternatives/results.json
diff --git a/evaluation_l1/super_glue/rte/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/super_glue/rte/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/GPT-3_style/results.json
diff --git a/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/super_glue/rte/does_it_follow_that/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/does_it_follow_that/results.json
similarity index 100%
rename from evaluation_l1/super_glue/rte/does_it_follow_that/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/does_it_follow_that/results.json
diff --git a/evaluation_l1/super_glue/rte/guaranteed_true/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/guaranteed_true/results.json
similarity index 100%
rename from evaluation_l1/super_glue/rte/guaranteed_true/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/guaranteed_true/results.json
diff --git a/evaluation_l1/super_glue/rte/should_assume/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/should_assume/results.json
similarity index 100%
rename from evaluation_l1/super_glue/rte/should_assume/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/should_assume/results.json
diff --git a/evaluation_l1/winogrande/winogrande_xl/Replace/results.json b/evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/Replace/results.json
similarity index 100%
rename from evaluation_l1/winogrande/winogrande_xl/Replace/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/Replace/results.json
diff --git a/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json b/evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json
similarity index 100%
rename from evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json
diff --git a/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json
similarity index 100%
rename from evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json
diff --git a/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json b/evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json
similarity index 100%
rename from evaluation_l1/winogrande/winogrande_xl/stand_for/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json
diff --git a/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json
similarity index 100%
rename from evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json
diff --git a/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json
similarity index 100%
rename from evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json
diff --git a/evaluation_l1/xcopa/id/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/id/best_option/results.json
similarity index 100%
rename from evaluation_l1/xcopa/id/best_option/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/id/best_option/results.json
diff --git a/evaluation_l1/xcopa/id/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/id/cause_effect/results.json
similarity index 100%
rename from evaluation_l1/xcopa/id/cause_effect/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/id/cause_effect/results.json
diff --git a/evaluation_l1/xcopa/id/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/id/i_am_hesitating/results.json
similarity index 100%
rename from evaluation_l1/xcopa/id/i_am_hesitating/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/id/i_am_hesitating/results.json
diff --git a/evaluation_l1/xcopa/id/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/id/plausible_alternatives/results.json
similarity index 100%
rename from evaluation_l1/xcopa/id/plausible_alternatives/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/id/plausible_alternatives/results.json
diff --git a/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json
similarity index 100%
rename from evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json
diff --git a/evaluation_l1/xcopa/sw/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/best_option/results.json
similarity index 100%
rename from evaluation_l1/xcopa/sw/best_option/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/best_option/results.json
diff --git a/evaluation_l1/xcopa/sw/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/cause_effect/results.json
similarity index 100%
rename from evaluation_l1/xcopa/sw/cause_effect/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/cause_effect/results.json
diff --git a/evaluation_l1/xcopa/sw/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/i_am_hesitating/results.json
similarity index 100%
rename from evaluation_l1/xcopa/sw/i_am_hesitating/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/i_am_hesitating/results.json
diff --git a/evaluation_l1/xcopa/sw/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/plausible_alternatives/results.json
similarity index 100%
rename from evaluation_l1/xcopa/sw/plausible_alternatives/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/plausible_alternatives/results.json
diff --git a/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json
similarity index 100%
rename from evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json
diff --git a/evaluation_l1/xcopa/ta/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/best_option/results.json
similarity index 100%
rename from evaluation_l1/xcopa/ta/best_option/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/best_option/results.json
diff --git a/evaluation_l1/xcopa/ta/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/cause_effect/results.json
similarity index 100%
rename from evaluation_l1/xcopa/ta/cause_effect/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/cause_effect/results.json
diff --git a/evaluation_l1/xcopa/ta/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/i_am_hesitating/results.json
similarity index 100%
rename from evaluation_l1/xcopa/ta/i_am_hesitating/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/i_am_hesitating/results.json
diff --git a/evaluation_l1/xcopa/ta/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/plausible_alternatives/results.json
similarity index 100%
rename from evaluation_l1/xcopa/ta/plausible_alternatives/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/plausible_alternatives/results.json
diff --git a/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json
similarity index 100%
rename from evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json
diff --git a/evaluation_l1/xcopa/vi/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/best_option/results.json
similarity index 100%
rename from evaluation_l1/xcopa/vi/best_option/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/best_option/results.json
diff --git a/evaluation_l1/xcopa/vi/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/cause_effect/results.json
similarity index 100%
rename from evaluation_l1/xcopa/vi/cause_effect/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/cause_effect/results.json
diff --git a/evaluation_l1/xcopa/vi/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/i_am_hesitating/results.json
similarity index 100%
rename from evaluation_l1/xcopa/vi/i_am_hesitating/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/i_am_hesitating/results.json
diff --git a/evaluation_l1/xcopa/vi/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/plausible_alternatives/results.json
similarity index 100%
rename from evaluation_l1/xcopa/vi/plausible_alternatives/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/plausible_alternatives/results.json
diff --git a/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json
similarity index 100%
rename from evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json
diff --git a/evaluation_l1/xcopa/zh/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/best_option/results.json
similarity index 100%
rename from evaluation_l1/xcopa/zh/best_option/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/best_option/results.json
diff --git a/evaluation_l1/xcopa/zh/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/cause_effect/results.json
similarity index 100%
rename from evaluation_l1/xcopa/zh/cause_effect/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/cause_effect/results.json
diff --git a/evaluation_l1/xcopa/zh/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/i_am_hesitating/results.json
similarity index 100%
rename from evaluation_l1/xcopa/zh/i_am_hesitating/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/i_am_hesitating/results.json
diff --git a/evaluation_l1/xcopa/zh/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/plausible_alternatives/results.json
similarity index 100%
rename from evaluation_l1/xcopa/zh/plausible_alternatives/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/plausible_alternatives/results.json
diff --git a/evaluation_l1/xnli/ar/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ar/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/xnli/ar/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ar/GPT-3_style/results.json
diff --git a/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/xnli/ar/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/xnli/ar/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ar/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/xnli/ar/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ar/can_we_infer/results.json
diff --git a/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/xnli/ar/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ar/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/xnli/ar/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ar/justified_in_saying/results.json
diff --git a/evaluation_l1/xnli/en/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/en/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/xnli/en/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/en/GPT-3_style/results.json
diff --git a/evaluation_l1/xnli/en/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/en/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/xnli/en/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/en/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/xnli/en/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/en/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/xnli/en/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/en/can_we_infer/results.json
diff --git a/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/xnli/en/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/en/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/xnli/en/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/en/justified_in_saying/results.json
diff --git a/evaluation_l1/xnli/es/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/es/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/xnli/es/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/es/GPT-3_style/results.json
diff --git a/evaluation_l1/xnli/es/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/es/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/xnli/es/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/es/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/xnli/es/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/es/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/xnli/es/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/es/can_we_infer/results.json
diff --git a/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/xnli/es/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/es/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/xnli/es/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/es/justified_in_saying/results.json
diff --git a/evaluation_l1/xnli/fr/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/fr/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/xnli/fr/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/fr/GPT-3_style/results.json
diff --git a/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/xnli/fr/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/xnli/fr/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/fr/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/xnli/fr/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/fr/can_we_infer/results.json
diff --git a/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/xnli/fr/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/fr/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/xnli/fr/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/fr/justified_in_saying/results.json
diff --git a/evaluation_l1/xnli/hi/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/hi/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/xnli/hi/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/hi/GPT-3_style/results.json
diff --git a/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/xnli/hi/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/xnli/hi/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/hi/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/xnli/hi/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/hi/can_we_infer/results.json
diff --git a/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/xnli/hi/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/hi/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/xnli/hi/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/hi/justified_in_saying/results.json
diff --git a/evaluation_l1/xnli/sw/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/sw/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/xnli/sw/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/sw/GPT-3_style/results.json
diff --git a/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/xnli/sw/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/xnli/sw/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/sw/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/xnli/sw/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/sw/can_we_infer/results.json
diff --git a/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/xnli/sw/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/sw/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/xnli/sw/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/sw/justified_in_saying/results.json
diff --git a/evaluation_l1/xnli/ur/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ur/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/xnli/ur/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ur/GPT-3_style/results.json
diff --git a/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/xnli/ur/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/xnli/ur/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ur/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/xnli/ur/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ur/can_we_infer/results.json
diff --git a/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/xnli/ur/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ur/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/xnli/ur/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ur/justified_in_saying/results.json
diff --git a/evaluation_l1/xnli/vi/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/vi/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/xnli/vi/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/vi/GPT-3_style/results.json
diff --git a/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/xnli/vi/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/xnli/vi/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/vi/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/xnli/vi/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/vi/can_we_infer/results.json
diff --git a/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/xnli/vi/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/vi/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/xnli/vi/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/vi/justified_in_saying/results.json
diff --git a/evaluation_l1/xnli/zh/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/zh/GPT-3_style/results.json
similarity index 100%
rename from evaluation_l1/xnli/zh/GPT-3_style/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/zh/GPT-3_style/results.json
diff --git a/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json
similarity index 100%
rename from evaluation_l1/xnli/zh/MNLI_crowdsource/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json
diff --git a/evaluation_l1/xnli/zh/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/zh/can_we_infer/results.json
similarity index 100%
rename from evaluation_l1/xnli/zh/can_we_infer/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/zh/can_we_infer/results.json
diff --git a/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json
similarity index 100%
rename from evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json
diff --git a/evaluation_l1/xnli/zh/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/zh/justified_in_saying/results.json
similarity index 100%
rename from evaluation_l1/xnli/zh/justified_in_saying/results.json
rename to evaluation_bloomz-7b1/evaluation_l1/xnli/zh/justified_in_saying/results.json
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Answer_Given_options/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..270db71333e2302d2d735a59743ac24b7bf0bb67
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Answer_Given_options/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "my",
+  "template_name": "Answer Given options",
+  "evaluation": {
+    "accuracy": 0.5043017868960953
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Choose_Story_Ending/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..2f58482a06656f3d83bdd1459a3e4b5fa534b041
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Choose_Story_Ending/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "my",
+  "template_name": "Choose Story Ending",
+  "evaluation": {
+    "accuracy": 0.49702183984116477
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Generate_Ending/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..b44fdf5bbcc99e50426b50ee3fdf4359d927e8ae
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Generate_Ending/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "my",
+  "template_name": "Generate Ending",
+  "evaluation": {
+    "accuracy": 0.48510919920582396
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Novel_Correct_Ending/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..a2245816d77b3cb1cc292336a33f537543d0a39e
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Novel_Correct_Ending/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "my",
+  "template_name": "Novel Correct Ending",
+  "evaluation": {
+    "accuracy": 0.499669093315685
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Story_Continuation_and_Options/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..8dfc2f00973ecf0e2ba3138bf5c5689bc4c55b59
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Story_Continuation_and_Options/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "my",
+  "template_name": "Story Continuation and Options",
+  "evaluation": {
+    "accuracy": 0.49106551952349436
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..9feba995957604f280ccc4aa41222411d06a7949
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "ru",
+  "template_name": "Answer Given options",
+  "evaluation": {
+    "accuracy": 0.514228987425546
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..f7f62178f612a0fa09b5fc45e1de0379f5caad4f
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "ru",
+  "template_name": "Choose Story Ending",
+  "evaluation": {
+    "accuracy": 0.6532097948378557
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..e3623ec577d930051ad134ee13e1a3d36146ec91
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "ru",
+  "template_name": "Generate Ending",
+  "evaluation": {
+    "accuracy": 0.5043017868960953
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..3c71e4a178f58adabfac3da0f00052119f228c13
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "ru",
+  "template_name": "Novel Correct Ending",
+  "evaluation": {
+    "accuracy": 0.6326935804103243
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..0d24e5161eaefd2bce1c1c64bb4193160876ac14
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "ru",
+  "template_name": "Story Continuation and Options",
+  "evaluation": {
+    "accuracy": 0.6412971542025149
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/Replace/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/Replace/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d3847738cbb57e5d6214e935f4b98fe598332f8
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/Replace/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "jp",
+  "template_name": "Replace",
+  "evaluation": {
+    "accuracy": 0.5286757038581856
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='jp', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/True_or_False/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/True_or_False/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..25978d08065d05b4ed3c08d0e362247fa414df42
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/True_or_False/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "jp",
+  "template_name": "True or False",
+  "evaluation": {
+    "accuracy": 0.4807090719499479
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='jp', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/does_underscore_refer_to/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..b6ee43811f0557fc60576c00e5f0102627f1e918
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/does_underscore_refer_to/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "jp",
+  "template_name": "does underscore refer to",
+  "evaluation": {
+    "accuracy": 0.5151199165797706
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='jp', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/stand_for/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/stand_for/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..e6df08d54b4d45b4d8e65d4ae70690096f1003a9
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/stand_for/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "jp",
+  "template_name": "stand for",
+  "evaluation": {
+    "accuracy": 0.49009384775808135
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='jp', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/underscore_refer_to/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..054947deee87d0f73c39c67ddf23a0f7c107eb2b
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/underscore_refer_to/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "jp",
+  "template_name": "underscore refer to",
+  "evaluation": {
+    "accuracy": 0.5067778936392076
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='jp', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/C1_or_C2?_premise/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..3c622a2f4b0805469dabb3e3d35cbcd2b1c390da
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/C1_or_C2?_premise/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "et",
+  "template_name": "C1 or C2? premise, so/because\u2026",
+  "evaluation": {
+    "accuracy": 0.5
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/best_option/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..628d54971a15c0589eced916cacfc63d8904303b
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/best_option/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "et",
+  "template_name": "best_option",
+  "evaluation": {
+    "accuracy": 0.49
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/cause_effect/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..4767de08c02cac8c4af52288d33676cc0ba92927
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/cause_effect/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "et",
+  "template_name": "cause_effect",
+  "evaluation": {
+    "accuracy": 0.49
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/i_am_hesitating/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..5b37393b08c237ede5f8fb34f994eabe7b0dc98f
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/i_am_hesitating/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "et",
+  "template_name": "i_am_hesitating",
+  "evaluation": {
+    "accuracy": 0.49
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/plausible_alternatives/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..59881679f8ab60081cbebc4110761b4ee21b1c7b
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/plausible_alternatives/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "et",
+  "template_name": "plausible_alternatives",
+  "evaluation": {
+    "accuracy": 0.48
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/C1_or_C2?_premise/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..28aa7bf010c751795f42cea0423165b411a62934
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/C1_or_C2?_premise/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "ht",
+  "template_name": "C1 or C2? premise, so/because\u2026",
+  "evaluation": {
+    "accuracy": 0.54
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/best_option/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..f37f1e77c7a139882c82c798432c717bf007c3c8
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/best_option/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "ht",
+  "template_name": "best_option",
+  "evaluation": {
+    "accuracy": 0.47
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/cause_effect/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..b2b16e769f653357015fd6405fc4d78368ba7777
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/cause_effect/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "ht",
+  "template_name": "cause_effect",
+  "evaluation": {
+    "accuracy": 0.49
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/i_am_hesitating/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..dbc2890be9b75fadd61b053ed1c4c8ee8e3cf92d
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/i_am_hesitating/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "ht",
+  "template_name": "i_am_hesitating",
+  "evaluation": {
+    "accuracy": 0.47
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/plausible_alternatives/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..4961c838779c5cc7b8ea1fd9c0a841853ace6bed
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/plausible_alternatives/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "ht",
+  "template_name": "plausible_alternatives",
+  "evaluation": {
+    "accuracy": 0.5
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/C1_or_C2?_premise/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..cdf7b9f5c80b6f3d693186fe0bccb2a90bf5b78c
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/C1_or_C2?_premise/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "it",
+  "template_name": "C1 or C2? premise, so/because\u2026",
+  "evaluation": {
+    "accuracy": 0.55
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/best_option/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..5c9449aadd07b1e44a8e2aa315cbc9a0092d8d79
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/best_option/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "it",
+  "template_name": "best_option",
+  "evaluation": {
+    "accuracy": 0.52
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/cause_effect/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..7529bd943f58bc39dc5388e96f81cf4506d2e8b2
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/cause_effect/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "it",
+  "template_name": "cause_effect",
+  "evaluation": {
+    "accuracy": 0.59
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/i_am_hesitating/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..cf923da32703426e7b6cbe6724e252ab77426c7e
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/i_am_hesitating/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "it",
+  "template_name": "i_am_hesitating",
+  "evaluation": {
+    "accuracy": 0.61
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/plausible_alternatives/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..37c658578827e3dec984106a4b7d592c5565b2d8
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/plausible_alternatives/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "it",
+  "template_name": "plausible_alternatives",
+  "evaluation": {
+    "accuracy": 0.59
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/C1_or_C2?_premise/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..7939dea9d5d9010c8e472eceed05f2c6def20be7
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/C1_or_C2?_premise/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "qu",
+  "template_name": "C1 or C2? premise, so/because\u2026",
+  "evaluation": {
+    "accuracy": 0.6
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/best_option/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..a6ae216d40cf90c7946bc4652dfdb50e7fb25e0c
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/best_option/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "qu",
+  "template_name": "best_option",
+  "evaluation": {
+    "accuracy": 0.45
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/cause_effect/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..07ada51e14a6f36af0f698a0fa0f68e0c2f0d49a
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/cause_effect/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "qu",
+  "template_name": "cause_effect",
+  "evaluation": {
+    "accuracy": 0.45
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/i_am_hesitating/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..16536437bfa2633c6118ce692549b74e1ccbec97
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/i_am_hesitating/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "qu",
+  "template_name": "i_am_hesitating",
+  "evaluation": {
+    "accuracy": 0.46
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/plausible_alternatives/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..2b13d3c1de6b6d106a37345220e26a02a81e07b5
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/plausible_alternatives/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "qu",
+  "template_name": "plausible_alternatives",
+  "evaluation": {
+    "accuracy": 0.43
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/C1_or_C2?_premise/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..61e9d07c4585209995a3740c2e4411cbf964829c
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/C1_or_C2?_premise/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "tr",
+  "template_name": "C1 or C2? premise, so/because\u2026",
+  "evaluation": {
+    "accuracy": 0.49
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/best_option/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..88fdb6bd97a1811c3ec72cbc759b9022be167b8c
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/best_option/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "tr",
+  "template_name": "best_option",
+  "evaluation": {
+    "accuracy": 0.44
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/cause_effect/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..194f239c44f068855140863abde58b9b39a71df4
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/cause_effect/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "tr",
+  "template_name": "cause_effect",
+  "evaluation": {
+    "accuracy": 0.47
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/i_am_hesitating/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ec9beafb9eca360eef44c090721da5d3ef6f8fe3
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/i_am_hesitating/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "tr",
+  "template_name": "i_am_hesitating",
+  "evaluation": {
+    "accuracy": 0.5
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/plausible_alternatives/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..792f5db5c4aa7a29ae1f23d5690c9e744767f800
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/plausible_alternatives/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "tr",
+  "template_name": "plausible_alternatives",
+  "evaluation": {
+    "accuracy": 0.53
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/GPT-3_style/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..6c364261b08713f364dfbdfae038001e665fba98
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/GPT-3_style/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "bg",
+  "template_name": "GPT-3 style",
+  "evaluation": {
+    "accuracy": 0.4353413654618474
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/MNLI_crowdsource/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..71f52dc7f359a415e72bc9520a35e5c3aca5057d
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/MNLI_crowdsource/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "bg",
+  "template_name": "MNLI crowdsource",
+  "evaluation": {
+    "accuracy": 0.3546184738955823
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/can_we_infer/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..7d8e7dad665b37fc6643a0782625ea27d24410d0
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/can_we_infer/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "bg",
+  "template_name": "can we infer",
+  "evaluation": {
+    "accuracy": 0.43614457831325304
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/guaranteed_possible_impossible/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..c39711da879e496c7e7b2f1e82494c0d9d963413
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/guaranteed_possible_impossible/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "bg",
+  "template_name": "guaranteed/possible/impossible",
+  "evaluation": {
+    "accuracy": 0.3453815261044177
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/justified_in_saying/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..8baee2830fb3fbe8a2ac7c15776d0dd01ebaa3db
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/justified_in_saying/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "bg",
+  "template_name": "justified in saying",
+  "evaluation": {
+    "accuracy": 0.43493975903614457
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/de/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/GPT-3_style/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..61f964663cde8f3765b43f2a2fd0a9e16957d655
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/GPT-3_style/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "de",
+  "template_name": "GPT-3 style",
+  "evaluation": {
+    "accuracy": 0.4682730923694779
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/de/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/MNLI_crowdsource/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..e7408e55827294d04d99b5f27324217c9dbcfba5
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/MNLI_crowdsource/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "de",
+  "template_name": "MNLI crowdsource",
+  "evaluation": {
+    "accuracy": 0.36626506024096384
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/de/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/can_we_infer/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..45350eb47b641f4ff2a60423408e6ff529a287a8
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/can_we_infer/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "de",
+  "template_name": "can we infer",
+  "evaluation": {
+    "accuracy": 0.45863453815261046
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/de/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/guaranteed_possible_impossible/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d92dbaca1632e6f7a8658665ee0bf0152e990e6
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/guaranteed_possible_impossible/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "de",
+  "template_name": "guaranteed/possible/impossible",
+  "evaluation": {
+    "accuracy": 0.3746987951807229
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/de/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/justified_in_saying/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..193b9b43b70acf89516c4cff41282cf25b24c966
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/justified_in_saying/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "de",
+  "template_name": "justified in saying",
+  "evaluation": {
+    "accuracy": 0.4630522088353414
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/el/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/GPT-3_style/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..0bfc8fd857e78ed120a40ad673548881c4fd3efe
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/GPT-3_style/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "el",
+  "template_name": "GPT-3 style",
+  "evaluation": {
+    "accuracy": 0.41244979919678715
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/el/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/MNLI_crowdsource/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..5dd399e2bd8c1f7fdb043cf4cc4c03071bc7a5ba
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/MNLI_crowdsource/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "el",
+  "template_name": "MNLI crowdsource",
+  "evaluation": {
+    "accuracy": 0.3538152610441767
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/el/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/can_we_infer/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..2064b26b4f027795986c63bf990dd5458644caa0
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/can_we_infer/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "el",
+  "template_name": "can we infer",
+  "evaluation": {
+    "accuracy": 0.4108433734939759
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/el/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/guaranteed_possible_impossible/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..c1961705ffb5458181047a2d594bcc4f7bcb2d9d
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/guaranteed_possible_impossible/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "el",
+  "template_name": "guaranteed/possible/impossible",
+  "evaluation": {
+    "accuracy": 0.342570281124498
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/el/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/justified_in_saying/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..843075f08ced825f87faf31a07c4c58fb35f6c53
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/justified_in_saying/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "el",
+  "template_name": "justified in saying",
+  "evaluation": {
+    "accuracy": 0.41526104417670684
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/GPT-3_style/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ecc109b75b2288c8f6a3b854995ebe40f1ddf20c
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/GPT-3_style/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "ru",
+  "template_name": "GPT-3 style",
+  "evaluation": {
+    "accuracy": 0.4775100401606426
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/MNLI_crowdsource/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..34db451e6b15b0fb65df6f43310981b2054e5961
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/MNLI_crowdsource/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "ru",
+  "template_name": "MNLI crowdsource",
+  "evaluation": {
+    "accuracy": 0.3714859437751004
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/can_we_infer/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..15edff1b502f490a0ddf77d076e1a636784acc72
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/can_we_infer/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "ru",
+  "template_name": "can we infer",
+  "evaluation": {
+    "accuracy": 0.4718875502008032
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/guaranteed_possible_impossible/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..6f885732db4a74e5e8b5e49028a3cf51f12e9bb4
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/guaranteed_possible_impossible/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "ru",
+  "template_name": "guaranteed/possible/impossible",
+  "evaluation": {
+    "accuracy": 0.351004016064257
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/justified_in_saying/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..7f632364facaaeba76d2cc478ef96e24eeb22eed
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/justified_in_saying/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "ru",
+  "template_name": "justified in saying",
+  "evaluation": {
+    "accuracy": 0.4666666666666667
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/th/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/GPT-3_style/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ad86ba7b9f7e3afac74291994a33340bd48f788e
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/GPT-3_style/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "th",
+  "template_name": "GPT-3 style",
+  "evaluation": {
+    "accuracy": 0.3795180722891566
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/th/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/MNLI_crowdsource/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..0301d844945575f4d1f3a213f75ce08d3534bbf9
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/MNLI_crowdsource/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "th",
+  "template_name": "MNLI crowdsource",
+  "evaluation": {
+    "accuracy": 0.3273092369477912
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/th/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/can_we_infer/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..c57a0597b21bc56e21203e1ce01f78cecc509675
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/can_we_infer/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "th",
+  "template_name": "can we infer",
+  "evaluation": {
+    "accuracy": 0.39759036144578314
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/th/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/guaranteed_possible_impossible/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..a19fe560e090ef6adcfbb7a36a49af5075787a5a
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/guaranteed_possible_impossible/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "th",
+  "template_name": "guaranteed/possible/impossible",
+  "evaluation": {
+    "accuracy": 0.3899598393574297
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/th/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/justified_in_saying/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..cecdcc9baf71c9213c2fcbda483992daecff794a
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/justified_in_saying/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "th",
+  "template_name": "justified in saying",
+  "evaluation": {
+    "accuracy": 0.40120481927710844
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/GPT-3_style/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..dfd55645facdd7f1485b60e33b7f4d68b44c7c44
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/GPT-3_style/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "tr",
+  "template_name": "GPT-3 style",
+  "evaluation": {
+    "accuracy": 0.3614457831325301
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..a5c8817a6751b85aaad915b8f476b8d8f21950ac
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "tr",
+  "template_name": "MNLI crowdsource",
+  "evaluation": {
+    "accuracy": 0.3493975903614458
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/can_we_infer/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..639729d55b17ec6546449d00f324413109017624
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/can_we_infer/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "tr",
+  "template_name": "can we infer",
+  "evaluation": {
+    "accuracy": 0.37309236947791163
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..f93f49c833b45918022badf75f5349aaea89f2e4
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "tr",
+  "template_name": "guaranteed/possible/impossible",
+  "evaluation": {
+    "accuracy": 0.3522088353413655
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/justified_in_saying/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..22b34ff972cbeb83cce14a02be811c52c21c31db
--- /dev/null
+++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/justified_in_saying/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "tr",
+  "template_name": "justified in saying",
+  "evaluation": {
+    "accuracy": 0.3755020080321285
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xnlihtmt/xnliht/ar/GPT-3_style_arht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/GPT-3_style_arht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/ar/GPT-3_style_arht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/GPT-3_style_arht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/ar/MNLI_crowdsource_arht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/MNLI_crowdsource_arht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/ar/MNLI_crowdsource_arht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/MNLI_crowdsource_arht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/ar/can_we_infer_arht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/can_we_infer_arht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/ar/can_we_infer_arht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/can_we_infer_arht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/ar/guaranteed_possible_impossible_arht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/guaranteed_possible_impossible_arht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/ar/guaranteed_possible_impossible_arht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/guaranteed_possible_impossible_arht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/ar/justified_in_saying_arht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/justified_in_saying_arht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/ar/justified_in_saying_arht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/justified_in_saying_arht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/es/GPT-3_style_esht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/GPT-3_style_esht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/es/GPT-3_style_esht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/GPT-3_style_esht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/es/MNLI_crowdsource_esht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/MNLI_crowdsource_esht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/es/MNLI_crowdsource_esht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/MNLI_crowdsource_esht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/es/can_we_infer_esht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/can_we_infer_esht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/es/can_we_infer_esht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/can_we_infer_esht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/es/guaranteed_possible_impossible_esht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/guaranteed_possible_impossible_esht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/es/guaranteed_possible_impossible_esht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/guaranteed_possible_impossible_esht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/es/justified_in_saying_esht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/justified_in_saying_esht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/es/justified_in_saying_esht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/justified_in_saying_esht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/fr/GPT-3_style_frht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/GPT-3_style_frht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/fr/GPT-3_style_frht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/GPT-3_style_frht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/fr/MNLI_crowdsource_frht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/MNLI_crowdsource_frht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/fr/MNLI_crowdsource_frht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/MNLI_crowdsource_frht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/fr/can_we_infer_frht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/can_we_infer_frht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/fr/can_we_infer_frht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/can_we_infer_frht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/fr/guaranteed_possible_impossible_frht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/guaranteed_possible_impossible_frht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/fr/guaranteed_possible_impossible_frht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/guaranteed_possible_impossible_frht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/fr/justified_in_saying_frht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/justified_in_saying_frht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/fr/justified_in_saying_frht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/justified_in_saying_frht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/hi/GPT-3_style_hiht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/GPT-3_style_hiht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/hi/GPT-3_style_hiht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/GPT-3_style_hiht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/hi/MNLI_crowdsource_hiht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/MNLI_crowdsource_hiht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/hi/MNLI_crowdsource_hiht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/MNLI_crowdsource_hiht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/hi/can_we_infer_hiht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/can_we_infer_hiht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/hi/can_we_infer_hiht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/can_we_infer_hiht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/hi/guaranteed_possible_impossible_hiht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/guaranteed_possible_impossible_hiht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/hi/guaranteed_possible_impossible_hiht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/guaranteed_possible_impossible_hiht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/hi/justified_in_saying_hiht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/justified_in_saying_hiht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/hi/justified_in_saying_hiht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/justified_in_saying_hiht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/merged.csv b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/merged.csv
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/merged.csv
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/merged.csv
diff --git a/evaluation_xnlihtmt/xnliht/merged.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/merged.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/merged.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/merged.json
diff --git a/evaluation_xnlihtmt/xnliht/sw/GPT-3_style_swht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/GPT-3_style_swht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/sw/GPT-3_style_swht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/GPT-3_style_swht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/sw/MNLI_crowdsource_swht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/MNLI_crowdsource_swht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/sw/MNLI_crowdsource_swht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/MNLI_crowdsource_swht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/sw/can_we_infer_swht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/can_we_infer_swht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/sw/can_we_infer_swht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/can_we_infer_swht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/sw/guaranteed_possible_impossible_swht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/guaranteed_possible_impossible_swht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/sw/guaranteed_possible_impossible_swht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/guaranteed_possible_impossible_swht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/sw/justified_in_saying_swht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/justified_in_saying_swht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/sw/justified_in_saying_swht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/justified_in_saying_swht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/ur/GPT-3_style_urht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/GPT-3_style_urht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/ur/GPT-3_style_urht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/GPT-3_style_urht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/ur/MNLI_crowdsource_urht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/MNLI_crowdsource_urht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/ur/MNLI_crowdsource_urht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/MNLI_crowdsource_urht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/ur/can_we_infer_urht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/can_we_infer_urht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/ur/can_we_infer_urht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/can_we_infer_urht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/ur/guaranteed_possible_impossible_urht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/guaranteed_possible_impossible_urht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/ur/guaranteed_possible_impossible_urht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/guaranteed_possible_impossible_urht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/ur/justified_in_saying_urht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/justified_in_saying_urht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/ur/justified_in_saying_urht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/justified_in_saying_urht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/vi/GPT-3_style_viht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/GPT-3_style_viht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/vi/GPT-3_style_viht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/GPT-3_style_viht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/vi/MNLI_crowdsource_viht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/MNLI_crowdsource_viht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/vi/MNLI_crowdsource_viht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/MNLI_crowdsource_viht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/vi/can_we_infer_viht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/can_we_infer_viht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/vi/can_we_infer_viht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/can_we_infer_viht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/vi/guaranteed_possible_impossible_viht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/guaranteed_possible_impossible_viht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/vi/guaranteed_possible_impossible_viht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/guaranteed_possible_impossible_viht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/vi/justified_in_saying_viht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/justified_in_saying_viht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/vi/justified_in_saying_viht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/justified_in_saying_viht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/zh/GPT-3_style_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/GPT-3_style_zhht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/zh/GPT-3_style_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/GPT-3_style_zhht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/zh/MNLI_crowdsource_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/MNLI_crowdsource_zhht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/zh/MNLI_crowdsource_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/MNLI_crowdsource_zhht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/zh/can_we_infer_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/can_we_infer_zhht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/zh/can_we_infer_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/can_we_infer_zhht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/zh/guaranteed_possible_impossible_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/guaranteed_possible_impossible_zhht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/zh/guaranteed_possible_impossible_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/guaranteed_possible_impossible_zhht/results.json
diff --git a/evaluation_xnlihtmt/xnliht/zh/justified_in_saying_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/justified_in_saying_zhht/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnliht/zh/justified_in_saying_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/justified_in_saying_zhht/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/ar/GPT-3_style_armt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/GPT-3_style_armt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/ar/GPT-3_style_armt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/GPT-3_style_armt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/ar/MNLI_crowdsource_armt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/MNLI_crowdsource_armt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/ar/MNLI_crowdsource_armt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/MNLI_crowdsource_armt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/ar/can_we_infer_armt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/can_we_infer_armt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/ar/can_we_infer_armt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/can_we_infer_armt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/ar/guaranteed_possible_impossible_armt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/guaranteed_possible_impossible_armt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/ar/guaranteed_possible_impossible_armt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/guaranteed_possible_impossible_armt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/ar/justified_in_saying_armt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/justified_in_saying_armt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/ar/justified_in_saying_armt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/justified_in_saying_armt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/es/GPT-3_style_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/GPT-3_style_esmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/es/GPT-3_style_esmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/GPT-3_style_esmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/es/MNLI_crowdsource_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/MNLI_crowdsource_esmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/es/MNLI_crowdsource_esmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/MNLI_crowdsource_esmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/es/can_we_infer_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/can_we_infer_esmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/es/can_we_infer_esmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/can_we_infer_esmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/es/guaranteed_possible_impossible_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/guaranteed_possible_impossible_esmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/es/guaranteed_possible_impossible_esmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/guaranteed_possible_impossible_esmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/es/justified_in_saying_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/justified_in_saying_esmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/es/justified_in_saying_esmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/justified_in_saying_esmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/fr/GPT-3_style_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/GPT-3_style_frmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/fr/GPT-3_style_frmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/GPT-3_style_frmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/fr/MNLI_crowdsource_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/MNLI_crowdsource_frmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/fr/MNLI_crowdsource_frmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/MNLI_crowdsource_frmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/fr/can_we_infer_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/can_we_infer_frmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/fr/can_we_infer_frmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/can_we_infer_frmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/fr/guaranteed_possible_impossible_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/guaranteed_possible_impossible_frmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/fr/guaranteed_possible_impossible_frmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/guaranteed_possible_impossible_frmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/fr/justified_in_saying_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/justified_in_saying_frmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/fr/justified_in_saying_frmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/justified_in_saying_frmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/hi/GPT-3_style_himt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/GPT-3_style_himt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/hi/GPT-3_style_himt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/GPT-3_style_himt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/hi/MNLI_crowdsource_himt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/MNLI_crowdsource_himt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/hi/MNLI_crowdsource_himt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/MNLI_crowdsource_himt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/hi/can_we_infer_himt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/can_we_infer_himt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/hi/can_we_infer_himt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/can_we_infer_himt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/hi/guaranteed_possible_impossible_himt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/guaranteed_possible_impossible_himt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/hi/guaranteed_possible_impossible_himt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/guaranteed_possible_impossible_himt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/hi/justified_in_saying_himt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/justified_in_saying_himt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/hi/justified_in_saying_himt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/justified_in_saying_himt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/merged.csv b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/merged.csv
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/merged.csv
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/merged.csv
diff --git a/evaluation_xnlihtmt/xnlimt/merged.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/merged.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/merged.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/merged.json
diff --git a/evaluation_xnlihtmt/xnlimt/sw/GPT-3_style_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/GPT-3_style_swmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/sw/GPT-3_style_swmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/GPT-3_style_swmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/sw/MNLI_crowdsource_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/MNLI_crowdsource_swmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/sw/MNLI_crowdsource_swmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/MNLI_crowdsource_swmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/sw/can_we_infer_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/can_we_infer_swmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/sw/can_we_infer_swmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/can_we_infer_swmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/sw/guaranteed_possible_impossible_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/guaranteed_possible_impossible_swmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/sw/guaranteed_possible_impossible_swmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/guaranteed_possible_impossible_swmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/sw/justified_in_saying_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/justified_in_saying_swmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/sw/justified_in_saying_swmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/justified_in_saying_swmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/ur/GPT-3_style_urmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/GPT-3_style_urmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/ur/GPT-3_style_urmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/GPT-3_style_urmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/ur/MNLI_crowdsource_urmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/MNLI_crowdsource_urmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/ur/MNLI_crowdsource_urmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/MNLI_crowdsource_urmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/ur/can_we_infer_urmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/can_we_infer_urmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/ur/can_we_infer_urmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/can_we_infer_urmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/ur/guaranteed_possible_impossible_urmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/guaranteed_possible_impossible_urmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/ur/guaranteed_possible_impossible_urmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/guaranteed_possible_impossible_urmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/ur/justified_in_saying_urmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/justified_in_saying_urmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/ur/justified_in_saying_urmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/justified_in_saying_urmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/vi/GPT-3_style_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/GPT-3_style_vimt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/vi/GPT-3_style_vimt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/GPT-3_style_vimt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/vi/MNLI_crowdsource_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/MNLI_crowdsource_vimt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/vi/MNLI_crowdsource_vimt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/MNLI_crowdsource_vimt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/vi/can_we_infer_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/can_we_infer_vimt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/vi/can_we_infer_vimt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/can_we_infer_vimt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/vi/guaranteed_possible_impossible_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/guaranteed_possible_impossible_vimt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/vi/guaranteed_possible_impossible_vimt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/guaranteed_possible_impossible_vimt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/vi/justified_in_saying_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/justified_in_saying_vimt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/vi/justified_in_saying_vimt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/justified_in_saying_vimt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/zh/GPT-3_style_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/GPT-3_style_zhmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/zh/GPT-3_style_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/GPT-3_style_zhmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/zh/MNLI_crowdsource_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/MNLI_crowdsource_zhmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/zh/MNLI_crowdsource_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/MNLI_crowdsource_zhmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/zh/can_we_infer_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/can_we_infer_zhmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/zh/can_we_infer_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/can_we_infer_zhmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/zh/guaranteed_possible_impossible_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/guaranteed_possible_impossible_zhmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/zh/guaranteed_possible_impossible_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/guaranteed_possible_impossible_zhmt/results.json
diff --git a/evaluation_xnlihtmt/xnlimt/zh/justified_in_saying_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/justified_in_saying_zhmt/results.json
similarity index 100%
rename from evaluation_xnlihtmt/xnlimt/zh/justified_in_saying_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/justified_in_saying_zhmt/results.json
diff --git a/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/Replace_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/Replace_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/Replace_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/Replace_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/merged.csv b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/merged.csv
similarity index 100%
rename from evaluation_xwinostorycopaht/merged.csv
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/merged.csv
diff --git a/evaluation_xwinostorycopaht/merged.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/merged.json
similarity index 100%
rename from evaluation_xwinostorycopaht/merged.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/merged.json
diff --git a/evaluation_xwinostorycopaht/xcopa/zh/C1_or_C2?_premise_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/C1_or_C2?_premise_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/xcopa/zh/C1_or_C2?_premise_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/C1_or_C2?_premise_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/xcopa/zh/best_option_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/best_option_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/xcopa/zh/best_option_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/best_option_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/xcopa/zh/cause_effect_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/cause_effect_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/xcopa/zh/cause_effect_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/cause_effect_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/xcopa/zh/i_am_hesitating_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/i_am_hesitating_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/xcopa/zh/i_am_hesitating_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/i_am_hesitating_zhht/results.json
diff --git a/evaluation_xwinostorycopaht/xcopa/zh/plausible_alternatives_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/plausible_alternatives_zhht/results.json
similarity index 100%
rename from evaluation_xwinostorycopaht/xcopa/zh/plausible_alternatives_zhht/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/plausible_alternatives_zhht/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/merged.csv b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/merged.csv
similarity index 100%
rename from evaluation_xwinostorycopamt/merged.csv
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/merged.csv
diff --git a/evaluation_xwinostorycopamt/merged.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/merged.json
similarity index 100%
rename from evaluation_xwinostorycopamt/merged.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/merged.json
diff --git a/evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json
similarity index 100%
rename from evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json
rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json
diff --git a/logs/logs/main_log.txt b/logs/logs/main_log.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70148d593b0163e069eaf89d0cdd1d5e7b8bc697
--- /dev/null
+++ b/logs/logs/main_log.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:524a637d585f893f1ea43a0f461af047926be408fe9971e14e9d1397d47e32fd
+size 233998372