diff --git a/.gitattributes b/.gitattributes index 1fd4826e6f1aafb9303f7a6f9709083bd5723fc3..af3ee52691bfc0434d58a4fd62e65013132f98a8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -31,3 +31,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text tokenizer.json filter=lfs diff=lfs merge=lfs -text +logs/logs/main_log.txt filter=lfs diff=lfs merge=lfs -text diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json rename to evaluation_bloomz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json diff --git a/evaluation_l1/anli/dev_r1/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/anli/dev_r1/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/GPT-3_style/results.json diff --git a/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json diff --git a/evaluation_l1/anli/dev_r1/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/can_we_infer/results.json similarity index 100% rename from evaluation_l1/anli/dev_r1/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/can_we_infer/results.json diff --git a/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/anli/dev_r1/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/anli/dev_r1/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r1/justified_in_saying/results.json diff --git a/evaluation_l1/anli/dev_r2/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/anli/dev_r2/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/GPT-3_style/results.json diff --git a/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json diff --git a/evaluation_l1/anli/dev_r2/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/can_we_infer/results.json similarity index 100% rename from evaluation_l1/anli/dev_r2/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/can_we_infer/results.json diff --git a/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/anli/dev_r2/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/anli/dev_r2/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r2/justified_in_saying/results.json diff --git a/evaluation_l1/anli/dev_r3/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/anli/dev_r3/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/GPT-3_style/results.json diff --git a/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json diff --git a/evaluation_l1/anli/dev_r3/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/can_we_infer/results.json similarity index 100% rename from evaluation_l1/anli/dev_r3/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/can_we_infer/results.json diff --git a/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/anli/dev_r3/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/anli/dev_r3/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/anli/dev_r3/justified_in_saying/results.json diff --git a/evaluation_l1/merged.csv b/evaluation_bloomz-7b1/evaluation_l1/merged.csv similarity index 100% rename from evaluation_l1/merged.csv rename to evaluation_bloomz-7b1/evaluation_l1/merged.csv diff --git a/evaluation_l1/merged.json b/evaluation_bloomz-7b1/evaluation_l1/merged.json similarity index 100% rename from evaluation_l1/merged.json rename to evaluation_bloomz-7b1/evaluation_l1/merged.json diff --git a/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json similarity index 100% rename from evaluation_l1/story_cloze/2016/Answer_Given_options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json diff --git a/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json similarity index 100% rename from evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json diff --git a/evaluation_l1/story_cloze/2016/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Generate_Ending/results.json similarity index 100% rename from evaluation_l1/story_cloze/2016/Generate_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Generate_Ending/results.json diff --git a/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json similarity index 100% rename from evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json rename to evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json diff --git a/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json similarity index 100% rename from evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json rename to evaluation_bloomz-7b1/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json diff --git a/evaluation_l1/super_glue/cb/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/super_glue/cb/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/GPT-3_style/results.json diff --git a/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json diff --git a/evaluation_l1/super_glue/cb/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/can_we_infer/results.json similarity index 100% rename from evaluation_l1/super_glue/cb/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/can_we_infer/results.json diff --git a/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/super_glue/cb/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/super_glue/cb/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/cb/justified_in_saying/results.json diff --git a/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json similarity index 100% rename from evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json diff --git a/evaluation_l1/super_glue/copa/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/best_option/results.json similarity index 100% rename from evaluation_l1/super_glue/copa/best_option/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/best_option/results.json diff --git a/evaluation_l1/super_glue/copa/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/cause_effect/results.json similarity index 100% rename from evaluation_l1/super_glue/copa/cause_effect/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/cause_effect/results.json diff --git a/evaluation_l1/super_glue/copa/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/i_am_hesitating/results.json similarity index 100% rename from evaluation_l1/super_glue/copa/i_am_hesitating/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/i_am_hesitating/results.json diff --git a/evaluation_l1/super_glue/copa/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/plausible_alternatives/results.json similarity index 100% rename from evaluation_l1/super_glue/copa/plausible_alternatives/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/copa/plausible_alternatives/results.json diff --git a/evaluation_l1/super_glue/rte/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/super_glue/rte/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/GPT-3_style/results.json diff --git a/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json diff --git a/evaluation_l1/super_glue/rte/does_it_follow_that/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/does_it_follow_that/results.json similarity index 100% rename from evaluation_l1/super_glue/rte/does_it_follow_that/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/does_it_follow_that/results.json diff --git a/evaluation_l1/super_glue/rte/guaranteed_true/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/guaranteed_true/results.json similarity index 100% rename from evaluation_l1/super_glue/rte/guaranteed_true/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/guaranteed_true/results.json diff --git a/evaluation_l1/super_glue/rte/should_assume/results.json b/evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/should_assume/results.json similarity index 100% rename from evaluation_l1/super_glue/rte/should_assume/results.json rename to evaluation_bloomz-7b1/evaluation_l1/super_glue/rte/should_assume/results.json diff --git a/evaluation_l1/winogrande/winogrande_xl/Replace/results.json b/evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/Replace/results.json similarity index 100% rename from evaluation_l1/winogrande/winogrande_xl/Replace/results.json rename to evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/Replace/results.json diff --git a/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json b/evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json similarity index 100% rename from evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json rename to evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json diff --git a/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json rename to evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json diff --git a/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json b/evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json similarity index 100% rename from evaluation_l1/winogrande/winogrande_xl/stand_for/results.json rename to evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json diff --git a/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json similarity index 100% rename from evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json rename to evaluation_bloomz-7b1/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json diff --git a/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json similarity index 100% rename from evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json diff --git a/evaluation_l1/xcopa/id/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/id/best_option/results.json similarity index 100% rename from evaluation_l1/xcopa/id/best_option/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/id/best_option/results.json diff --git a/evaluation_l1/xcopa/id/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/id/cause_effect/results.json similarity index 100% rename from evaluation_l1/xcopa/id/cause_effect/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/id/cause_effect/results.json diff --git a/evaluation_l1/xcopa/id/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/id/i_am_hesitating/results.json similarity index 100% rename from evaluation_l1/xcopa/id/i_am_hesitating/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/id/i_am_hesitating/results.json diff --git a/evaluation_l1/xcopa/id/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/id/plausible_alternatives/results.json similarity index 100% rename from evaluation_l1/xcopa/id/plausible_alternatives/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/id/plausible_alternatives/results.json diff --git a/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json similarity index 100% rename from evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json diff --git a/evaluation_l1/xcopa/sw/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/best_option/results.json similarity index 100% rename from evaluation_l1/xcopa/sw/best_option/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/best_option/results.json diff --git a/evaluation_l1/xcopa/sw/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/cause_effect/results.json similarity index 100% rename from evaluation_l1/xcopa/sw/cause_effect/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/cause_effect/results.json diff --git a/evaluation_l1/xcopa/sw/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/i_am_hesitating/results.json similarity index 100% rename from evaluation_l1/xcopa/sw/i_am_hesitating/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/i_am_hesitating/results.json diff --git a/evaluation_l1/xcopa/sw/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/plausible_alternatives/results.json similarity index 100% rename from evaluation_l1/xcopa/sw/plausible_alternatives/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/sw/plausible_alternatives/results.json diff --git a/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json similarity index 100% rename from evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json diff --git a/evaluation_l1/xcopa/ta/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/best_option/results.json similarity index 100% rename from evaluation_l1/xcopa/ta/best_option/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/best_option/results.json diff --git a/evaluation_l1/xcopa/ta/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/cause_effect/results.json similarity index 100% rename from evaluation_l1/xcopa/ta/cause_effect/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/cause_effect/results.json diff --git a/evaluation_l1/xcopa/ta/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/i_am_hesitating/results.json similarity index 100% rename from evaluation_l1/xcopa/ta/i_am_hesitating/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/i_am_hesitating/results.json diff --git a/evaluation_l1/xcopa/ta/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/plausible_alternatives/results.json similarity index 100% rename from evaluation_l1/xcopa/ta/plausible_alternatives/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/ta/plausible_alternatives/results.json diff --git a/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json similarity index 100% rename from evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json diff --git a/evaluation_l1/xcopa/vi/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/best_option/results.json similarity index 100% rename from evaluation_l1/xcopa/vi/best_option/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/best_option/results.json diff --git a/evaluation_l1/xcopa/vi/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/cause_effect/results.json similarity index 100% rename from evaluation_l1/xcopa/vi/cause_effect/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/cause_effect/results.json diff --git a/evaluation_l1/xcopa/vi/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/i_am_hesitating/results.json similarity index 100% rename from evaluation_l1/xcopa/vi/i_am_hesitating/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/i_am_hesitating/results.json diff --git a/evaluation_l1/xcopa/vi/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/plausible_alternatives/results.json similarity index 100% rename from evaluation_l1/xcopa/vi/plausible_alternatives/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/vi/plausible_alternatives/results.json diff --git a/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json similarity index 100% rename from evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json diff --git a/evaluation_l1/xcopa/zh/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/best_option/results.json similarity index 100% rename from evaluation_l1/xcopa/zh/best_option/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/best_option/results.json diff --git a/evaluation_l1/xcopa/zh/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/cause_effect/results.json similarity index 100% rename from evaluation_l1/xcopa/zh/cause_effect/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/cause_effect/results.json diff --git a/evaluation_l1/xcopa/zh/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/i_am_hesitating/results.json similarity index 100% rename from evaluation_l1/xcopa/zh/i_am_hesitating/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/i_am_hesitating/results.json diff --git a/evaluation_l1/xcopa/zh/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/plausible_alternatives/results.json similarity index 100% rename from evaluation_l1/xcopa/zh/plausible_alternatives/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xcopa/zh/plausible_alternatives/results.json diff --git a/evaluation_l1/xnli/ar/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ar/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/ar/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ar/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/ar/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/ar/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ar/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/ar/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ar/can_we_infer/results.json diff --git a/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/ar/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ar/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/ar/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ar/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/en/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/en/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/en/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/en/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/en/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/en/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/en/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/en/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/en/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/en/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/en/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/en/can_we_infer/results.json diff --git a/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/en/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/en/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/en/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/en/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/es/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/es/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/es/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/es/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/es/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/es/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/es/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/es/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/es/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/es/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/es/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/es/can_we_infer/results.json diff --git a/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/es/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/es/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/es/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/es/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/fr/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/fr/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/fr/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/fr/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/fr/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/fr/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/fr/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/fr/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/fr/can_we_infer/results.json diff --git a/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/fr/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/fr/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/fr/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/fr/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/hi/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/hi/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/hi/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/hi/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/hi/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/hi/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/hi/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/hi/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/hi/can_we_infer/results.json diff --git a/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/hi/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/hi/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/hi/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/hi/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/sw/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/sw/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/sw/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/sw/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/sw/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/sw/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/sw/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/sw/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/sw/can_we_infer/results.json diff --git a/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/sw/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/sw/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/sw/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/sw/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/ur/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ur/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/ur/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ur/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/ur/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/ur/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ur/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/ur/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ur/can_we_infer/results.json diff --git a/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/ur/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/ur/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/ur/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/ur/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/vi/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/vi/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/vi/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/vi/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/vi/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/vi/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/vi/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/vi/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/vi/can_we_infer/results.json diff --git a/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/vi/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/vi/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/vi/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/vi/justified_in_saying/results.json diff --git a/evaluation_l1/xnli/zh/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/zh/GPT-3_style/results.json similarity index 100% rename from evaluation_l1/xnli/zh/GPT-3_style/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/zh/GPT-3_style/results.json diff --git a/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json similarity index 100% rename from evaluation_l1/xnli/zh/MNLI_crowdsource/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json diff --git a/evaluation_l1/xnli/zh/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/zh/can_we_infer/results.json similarity index 100% rename from evaluation_l1/xnli/zh/can_we_infer/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/zh/can_we_infer/results.json diff --git a/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json similarity index 100% rename from evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json diff --git a/evaluation_l1/xnli/zh/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l1/xnli/zh/justified_in_saying/results.json similarity index 100% rename from evaluation_l1/xnli/zh/justified_in_saying/results.json rename to evaluation_bloomz-7b1/evaluation_l1/xnli/zh/justified_in_saying/results.json diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Answer_Given_options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..270db71333e2302d2d735a59743ac24b7bf0bb67 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Answer_Given_options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "my", + "template_name": "Answer Given options", + "evaluation": { + "accuracy": 0.5043017868960953 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2f58482a06656f3d83bdd1459a3e4b5fa534b041 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "my", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.49702183984116477 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b44fdf5bbcc99e50426b50ee3fdf4359d927e8ae --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "my", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.48510919920582396 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a2245816d77b3cb1cc292336a33f537543d0a39e --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "my", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.499669093315685 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Story_Continuation_and_Options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8dfc2f00973ecf0e2ba3138bf5c5689bc4c55b59 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/my/Story_Continuation_and_Options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "my", + "template_name": "Story Continuation and Options", + "evaluation": { + "accuracy": 0.49106551952349436 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='my', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..9feba995957604f280ccc4aa41222411d06a7949 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ru", + "template_name": "Answer Given options", + "evaluation": { + "accuracy": 0.514228987425546 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f7f62178f612a0fa09b5fc45e1de0379f5caad4f --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ru", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.6532097948378557 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e3623ec577d930051ad134ee13e1a3d36146ec91 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ru", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.5043017868960953 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3c71e4a178f58adabfac3da0f00052119f228c13 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ru", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.6326935804103243 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0d24e5161eaefd2bce1c1c64bb4193160876ac14 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ru", + "template_name": "Story Continuation and Options", + "evaluation": { + "accuracy": 0.6412971542025149 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/Replace/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/Replace/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5d3847738cbb57e5d6214e935f4b98fe598332f8 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/Replace/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "jp", + "template_name": "Replace", + "evaluation": { + "accuracy": 0.5286757038581856 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='jp', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/True_or_False/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/True_or_False/results.json new file mode 100644 index 0000000000000000000000000000000000000000..25978d08065d05b4ed3c08d0e362247fa414df42 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/True_or_False/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "jp", + "template_name": "True or False", + "evaluation": { + "accuracy": 0.4807090719499479 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='jp', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/does_underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/does_underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b6ee43811f0557fc60576c00e5f0102627f1e918 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/does_underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "jp", + "template_name": "does underscore refer to", + "evaluation": { + "accuracy": 0.5151199165797706 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='jp', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/stand_for/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/stand_for/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e6df08d54b4d45b4d8e65d4ae70690096f1003a9 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/stand_for/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "jp", + "template_name": "stand for", + "evaluation": { + "accuracy": 0.49009384775808135 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='jp', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/underscore_refer_to/results.json b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..054947deee87d0f73c39c67ddf23a0f7c107eb2b --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/Muennighoff_xwinograd/jp/underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "jp", + "template_name": "underscore refer to", + "evaluation": { + "accuracy": 0.5067778936392076 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='jp', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3c622a2f4b0805469dabb3e3d35cbcd2b1c390da --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "et", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.5 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..628d54971a15c0589eced916cacfc63d8904303b --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "et", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.49 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4767de08c02cac8c4af52288d33676cc0ba92927 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "et", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.49 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5b37393b08c237ede5f8fb34f994eabe7b0dc98f --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "et", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.49 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..59881679f8ab60081cbebc4110761b4ee21b1c7b --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/et/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "et", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.48 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='et', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..28aa7bf010c751795f42cea0423165b411a62934 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ht", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.54 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f37f1e77c7a139882c82c798432c717bf007c3c8 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ht", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.47 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b2b16e769f653357015fd6405fc4d78368ba7777 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ht", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.49 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..dbc2890be9b75fadd61b053ed1c4c8ee8e3cf92d --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ht", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.47 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4961c838779c5cc7b8ea1fd9c0a841853ace6bed --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/ht/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ht", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.5 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ht', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..cdf7b9f5c80b6f3d693186fe0bccb2a90bf5b78c --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "it", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.55 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5c9449aadd07b1e44a8e2aa315cbc9a0092d8d79 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "it", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.52 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7529bd943f58bc39dc5388e96f81cf4506d2e8b2 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "it", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.59 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..cf923da32703426e7b6cbe6724e252ab77426c7e --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "it", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.61 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..37c658578827e3dec984106a4b7d592c5565b2d8 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/it/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "it", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.59 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='it', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7939dea9d5d9010c8e472eceed05f2c6def20be7 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "qu", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.6 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a6ae216d40cf90c7946bc4652dfdb50e7fb25e0c --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "qu", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.45 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..07ada51e14a6f36af0f698a0fa0f68e0c2f0d49a --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "qu", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.45 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..16536437bfa2633c6118ce692549b74e1ccbec97 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "qu", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.46 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2b13d3c1de6b6d106a37345220e26a02a81e07b5 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/qu/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "qu", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.43 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='qu', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/C1_or_C2?_premise/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..61e9d07c4585209995a3740c2e4411cbf964829c --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "tr", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.49 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/best_option/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..88fdb6bd97a1811c3ec72cbc759b9022be167b8c --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "tr", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.44 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/cause_effect/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..194f239c44f068855140863abde58b9b39a71df4 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "tr", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.47 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/i_am_hesitating/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ec9beafb9eca360eef44c090721da5d3ef6f8fe3 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "tr", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.5 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/plausible_alternatives/results.json b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..792f5db5c4aa7a29ae1f23d5690c9e744767f800 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xcopa/tr/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "tr", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.53 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6c364261b08713f364dfbdfae038001e665fba98 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "bg", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.4353413654618474 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..71f52dc7f359a415e72bc9520a35e5c3aca5057d --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "bg", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.3546184738955823 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7d8e7dad665b37fc6643a0782625ea27d24410d0 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "bg", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.43614457831325304 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c39711da879e496c7e7b2f1e82494c0d9d963413 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "bg", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.3453815261044177 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8baee2830fb3fbe8a2ac7c15776d0dd01ebaa3db --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/bg/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "bg", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.43493975903614457 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='bg', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/de/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..61f964663cde8f3765b43f2a2fd0a9e16957d655 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "de", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.4682730923694779 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/de/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e7408e55827294d04d99b5f27324217c9dbcfba5 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "de", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.36626506024096384 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/de/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..45350eb47b641f4ff2a60423408e6ff529a287a8 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "de", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.45863453815261046 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/de/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6d92dbaca1632e6f7a8658665ee0bf0152e990e6 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "de", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.3746987951807229 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/de/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..193b9b43b70acf89516c4cff41282cf25b24c966 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/de/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "de", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.4630522088353414 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='de', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/el/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0bfc8fd857e78ed120a40ad673548881c4fd3efe --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "el", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.41244979919678715 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/el/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5dd399e2bd8c1f7fdb043cf4cc4c03071bc7a5ba --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "el", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.3538152610441767 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/el/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2064b26b4f027795986c63bf990dd5458644caa0 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "el", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.4108433734939759 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/el/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c1961705ffb5458181047a2d594bcc4f7bcb2d9d --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "el", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.342570281124498 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/el/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..843075f08ced825f87faf31a07c4c58fb35f6c53 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/el/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "el", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.41526104417670684 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='el', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ecc109b75b2288c8f6a3b854995ebe40f1ddf20c --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ru", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.4775100401606426 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..34db451e6b15b0fb65df6f43310981b2054e5961 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ru", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.3714859437751004 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..15edff1b502f490a0ddf77d076e1a636784acc72 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ru", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.4718875502008032 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6f885732db4a74e5e8b5e49028a3cf51f12e9bb4 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ru", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.351004016064257 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7f632364facaaeba76d2cc478ef96e24eeb22eed --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/ru/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ru", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.4666666666666667 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ru', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/th/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ad86ba7b9f7e3afac74291994a33340bd48f788e --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "th", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.3795180722891566 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/th/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0301d844945575f4d1f3a213f75ce08d3534bbf9 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "th", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.3273092369477912 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/th/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c57a0597b21bc56e21203e1ce01f78cecc509675 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "th", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.39759036144578314 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/th/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a19fe560e090ef6adcfbb7a36a49af5075787a5a --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "th", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.3899598393574297 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/th/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..cecdcc9baf71c9213c2fcbda483992daecff794a --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/th/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "th", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.40120481927710844 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/GPT-3_style/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..dfd55645facdd7f1485b60e33b7f4d68b44c7c44 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.3614457831325301 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a5c8817a6751b85aaad915b8f476b8d8f21950ac --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.3493975903614458 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/can_we_infer/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..639729d55b17ec6546449d00f324413109017624 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.37309236947791163 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f93f49c833b45918022badf75f5349aaea89f2e4 --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.3522088353413655 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/justified_in_saying/results.json b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..22b34ff972cbeb83cce14a02be811c52c21c31db --- /dev/null +++ b/evaluation_bloomz-7b1/evaluation_l2/xnli/tr/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.3755020080321285 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/bloomz-7b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/ar/GPT-3_style_arht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/GPT-3_style_arht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ar/GPT-3_style_arht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/GPT-3_style_arht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ar/MNLI_crowdsource_arht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/MNLI_crowdsource_arht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ar/MNLI_crowdsource_arht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/MNLI_crowdsource_arht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ar/can_we_infer_arht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/can_we_infer_arht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ar/can_we_infer_arht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/can_we_infer_arht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ar/guaranteed_possible_impossible_arht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/guaranteed_possible_impossible_arht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ar/guaranteed_possible_impossible_arht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/guaranteed_possible_impossible_arht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ar/justified_in_saying_arht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/justified_in_saying_arht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ar/justified_in_saying_arht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ar/justified_in_saying_arht/results.json diff --git a/evaluation_xnlihtmt/xnliht/es/GPT-3_style_esht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/GPT-3_style_esht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/es/GPT-3_style_esht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/GPT-3_style_esht/results.json diff --git a/evaluation_xnlihtmt/xnliht/es/MNLI_crowdsource_esht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/MNLI_crowdsource_esht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/es/MNLI_crowdsource_esht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/MNLI_crowdsource_esht/results.json diff --git a/evaluation_xnlihtmt/xnliht/es/can_we_infer_esht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/can_we_infer_esht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/es/can_we_infer_esht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/can_we_infer_esht/results.json diff --git a/evaluation_xnlihtmt/xnliht/es/guaranteed_possible_impossible_esht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/guaranteed_possible_impossible_esht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/es/guaranteed_possible_impossible_esht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/guaranteed_possible_impossible_esht/results.json diff --git a/evaluation_xnlihtmt/xnliht/es/justified_in_saying_esht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/justified_in_saying_esht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/es/justified_in_saying_esht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/es/justified_in_saying_esht/results.json diff --git a/evaluation_xnlihtmt/xnliht/fr/GPT-3_style_frht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/GPT-3_style_frht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/fr/GPT-3_style_frht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/GPT-3_style_frht/results.json diff --git a/evaluation_xnlihtmt/xnliht/fr/MNLI_crowdsource_frht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/MNLI_crowdsource_frht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/fr/MNLI_crowdsource_frht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/MNLI_crowdsource_frht/results.json diff --git a/evaluation_xnlihtmt/xnliht/fr/can_we_infer_frht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/can_we_infer_frht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/fr/can_we_infer_frht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/can_we_infer_frht/results.json diff --git a/evaluation_xnlihtmt/xnliht/fr/guaranteed_possible_impossible_frht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/guaranteed_possible_impossible_frht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/fr/guaranteed_possible_impossible_frht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/guaranteed_possible_impossible_frht/results.json diff --git a/evaluation_xnlihtmt/xnliht/fr/justified_in_saying_frht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/justified_in_saying_frht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/fr/justified_in_saying_frht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/fr/justified_in_saying_frht/results.json diff --git a/evaluation_xnlihtmt/xnliht/hi/GPT-3_style_hiht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/GPT-3_style_hiht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/hi/GPT-3_style_hiht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/GPT-3_style_hiht/results.json diff --git a/evaluation_xnlihtmt/xnliht/hi/MNLI_crowdsource_hiht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/MNLI_crowdsource_hiht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/hi/MNLI_crowdsource_hiht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/MNLI_crowdsource_hiht/results.json diff --git a/evaluation_xnlihtmt/xnliht/hi/can_we_infer_hiht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/can_we_infer_hiht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/hi/can_we_infer_hiht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/can_we_infer_hiht/results.json diff --git a/evaluation_xnlihtmt/xnliht/hi/guaranteed_possible_impossible_hiht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/guaranteed_possible_impossible_hiht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/hi/guaranteed_possible_impossible_hiht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/guaranteed_possible_impossible_hiht/results.json diff --git a/evaluation_xnlihtmt/xnliht/hi/justified_in_saying_hiht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/justified_in_saying_hiht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/hi/justified_in_saying_hiht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/hi/justified_in_saying_hiht/results.json diff --git a/evaluation_xnlihtmt/xnliht/merged.csv b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/merged.csv similarity index 100% rename from evaluation_xnlihtmt/xnliht/merged.csv rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/merged.csv diff --git a/evaluation_xnlihtmt/xnliht/merged.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/merged.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/merged.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/merged.json diff --git a/evaluation_xnlihtmt/xnliht/sw/GPT-3_style_swht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/GPT-3_style_swht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/sw/GPT-3_style_swht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/GPT-3_style_swht/results.json diff --git a/evaluation_xnlihtmt/xnliht/sw/MNLI_crowdsource_swht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/MNLI_crowdsource_swht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/sw/MNLI_crowdsource_swht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/MNLI_crowdsource_swht/results.json diff --git a/evaluation_xnlihtmt/xnliht/sw/can_we_infer_swht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/can_we_infer_swht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/sw/can_we_infer_swht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/can_we_infer_swht/results.json diff --git a/evaluation_xnlihtmt/xnliht/sw/guaranteed_possible_impossible_swht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/guaranteed_possible_impossible_swht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/sw/guaranteed_possible_impossible_swht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/guaranteed_possible_impossible_swht/results.json diff --git a/evaluation_xnlihtmt/xnliht/sw/justified_in_saying_swht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/justified_in_saying_swht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/sw/justified_in_saying_swht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/sw/justified_in_saying_swht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ur/GPT-3_style_urht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/GPT-3_style_urht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ur/GPT-3_style_urht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/GPT-3_style_urht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ur/MNLI_crowdsource_urht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/MNLI_crowdsource_urht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ur/MNLI_crowdsource_urht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/MNLI_crowdsource_urht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ur/can_we_infer_urht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/can_we_infer_urht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ur/can_we_infer_urht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/can_we_infer_urht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ur/guaranteed_possible_impossible_urht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/guaranteed_possible_impossible_urht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ur/guaranteed_possible_impossible_urht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/guaranteed_possible_impossible_urht/results.json diff --git a/evaluation_xnlihtmt/xnliht/ur/justified_in_saying_urht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/justified_in_saying_urht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/ur/justified_in_saying_urht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/ur/justified_in_saying_urht/results.json diff --git a/evaluation_xnlihtmt/xnliht/vi/GPT-3_style_viht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/GPT-3_style_viht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/vi/GPT-3_style_viht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/GPT-3_style_viht/results.json diff --git a/evaluation_xnlihtmt/xnliht/vi/MNLI_crowdsource_viht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/MNLI_crowdsource_viht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/vi/MNLI_crowdsource_viht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/MNLI_crowdsource_viht/results.json diff --git a/evaluation_xnlihtmt/xnliht/vi/can_we_infer_viht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/can_we_infer_viht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/vi/can_we_infer_viht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/can_we_infer_viht/results.json diff --git a/evaluation_xnlihtmt/xnliht/vi/guaranteed_possible_impossible_viht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/guaranteed_possible_impossible_viht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/vi/guaranteed_possible_impossible_viht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/guaranteed_possible_impossible_viht/results.json diff --git a/evaluation_xnlihtmt/xnliht/vi/justified_in_saying_viht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/justified_in_saying_viht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/vi/justified_in_saying_viht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/vi/justified_in_saying_viht/results.json diff --git a/evaluation_xnlihtmt/xnliht/zh/GPT-3_style_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/GPT-3_style_zhht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/zh/GPT-3_style_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/GPT-3_style_zhht/results.json diff --git a/evaluation_xnlihtmt/xnliht/zh/MNLI_crowdsource_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/MNLI_crowdsource_zhht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/zh/MNLI_crowdsource_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/MNLI_crowdsource_zhht/results.json diff --git a/evaluation_xnlihtmt/xnliht/zh/can_we_infer_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/can_we_infer_zhht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/zh/can_we_infer_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/can_we_infer_zhht/results.json diff --git a/evaluation_xnlihtmt/xnliht/zh/guaranteed_possible_impossible_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/guaranteed_possible_impossible_zhht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/zh/guaranteed_possible_impossible_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/guaranteed_possible_impossible_zhht/results.json diff --git a/evaluation_xnlihtmt/xnliht/zh/justified_in_saying_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/justified_in_saying_zhht/results.json similarity index 100% rename from evaluation_xnlihtmt/xnliht/zh/justified_in_saying_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xnliht/xnli/zh/justified_in_saying_zhht/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ar/GPT-3_style_armt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/GPT-3_style_armt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ar/GPT-3_style_armt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/GPT-3_style_armt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ar/MNLI_crowdsource_armt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/MNLI_crowdsource_armt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ar/MNLI_crowdsource_armt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/MNLI_crowdsource_armt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ar/can_we_infer_armt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/can_we_infer_armt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ar/can_we_infer_armt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/can_we_infer_armt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ar/guaranteed_possible_impossible_armt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/guaranteed_possible_impossible_armt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ar/guaranteed_possible_impossible_armt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/guaranteed_possible_impossible_armt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ar/justified_in_saying_armt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/justified_in_saying_armt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ar/justified_in_saying_armt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ar/justified_in_saying_armt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/es/GPT-3_style_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/GPT-3_style_esmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/es/GPT-3_style_esmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/GPT-3_style_esmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/es/MNLI_crowdsource_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/MNLI_crowdsource_esmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/es/MNLI_crowdsource_esmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/MNLI_crowdsource_esmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/es/can_we_infer_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/can_we_infer_esmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/es/can_we_infer_esmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/can_we_infer_esmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/es/guaranteed_possible_impossible_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/guaranteed_possible_impossible_esmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/es/guaranteed_possible_impossible_esmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/guaranteed_possible_impossible_esmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/es/justified_in_saying_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/justified_in_saying_esmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/es/justified_in_saying_esmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/es/justified_in_saying_esmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/fr/GPT-3_style_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/GPT-3_style_frmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/fr/GPT-3_style_frmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/GPT-3_style_frmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/fr/MNLI_crowdsource_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/MNLI_crowdsource_frmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/fr/MNLI_crowdsource_frmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/MNLI_crowdsource_frmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/fr/can_we_infer_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/can_we_infer_frmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/fr/can_we_infer_frmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/can_we_infer_frmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/fr/guaranteed_possible_impossible_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/guaranteed_possible_impossible_frmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/fr/guaranteed_possible_impossible_frmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/guaranteed_possible_impossible_frmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/fr/justified_in_saying_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/justified_in_saying_frmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/fr/justified_in_saying_frmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/fr/justified_in_saying_frmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/hi/GPT-3_style_himt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/GPT-3_style_himt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/hi/GPT-3_style_himt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/GPT-3_style_himt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/hi/MNLI_crowdsource_himt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/MNLI_crowdsource_himt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/hi/MNLI_crowdsource_himt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/MNLI_crowdsource_himt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/hi/can_we_infer_himt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/can_we_infer_himt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/hi/can_we_infer_himt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/can_we_infer_himt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/hi/guaranteed_possible_impossible_himt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/guaranteed_possible_impossible_himt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/hi/guaranteed_possible_impossible_himt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/guaranteed_possible_impossible_himt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/hi/justified_in_saying_himt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/justified_in_saying_himt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/hi/justified_in_saying_himt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/hi/justified_in_saying_himt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/merged.csv b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/merged.csv similarity index 100% rename from evaluation_xnlihtmt/xnlimt/merged.csv rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/merged.csv diff --git a/evaluation_xnlihtmt/xnlimt/merged.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/merged.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/merged.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/merged.json diff --git a/evaluation_xnlihtmt/xnlimt/sw/GPT-3_style_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/GPT-3_style_swmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/sw/GPT-3_style_swmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/GPT-3_style_swmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/sw/MNLI_crowdsource_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/MNLI_crowdsource_swmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/sw/MNLI_crowdsource_swmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/MNLI_crowdsource_swmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/sw/can_we_infer_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/can_we_infer_swmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/sw/can_we_infer_swmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/can_we_infer_swmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/sw/guaranteed_possible_impossible_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/guaranteed_possible_impossible_swmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/sw/guaranteed_possible_impossible_swmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/guaranteed_possible_impossible_swmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/sw/justified_in_saying_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/justified_in_saying_swmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/sw/justified_in_saying_swmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/sw/justified_in_saying_swmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ur/GPT-3_style_urmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/GPT-3_style_urmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ur/GPT-3_style_urmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/GPT-3_style_urmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ur/MNLI_crowdsource_urmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/MNLI_crowdsource_urmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ur/MNLI_crowdsource_urmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/MNLI_crowdsource_urmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ur/can_we_infer_urmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/can_we_infer_urmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ur/can_we_infer_urmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/can_we_infer_urmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ur/guaranteed_possible_impossible_urmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/guaranteed_possible_impossible_urmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ur/guaranteed_possible_impossible_urmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/guaranteed_possible_impossible_urmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/ur/justified_in_saying_urmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/justified_in_saying_urmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/ur/justified_in_saying_urmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/ur/justified_in_saying_urmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/vi/GPT-3_style_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/GPT-3_style_vimt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/vi/GPT-3_style_vimt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/GPT-3_style_vimt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/vi/MNLI_crowdsource_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/MNLI_crowdsource_vimt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/vi/MNLI_crowdsource_vimt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/MNLI_crowdsource_vimt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/vi/can_we_infer_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/can_we_infer_vimt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/vi/can_we_infer_vimt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/can_we_infer_vimt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/vi/guaranteed_possible_impossible_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/guaranteed_possible_impossible_vimt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/vi/guaranteed_possible_impossible_vimt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/guaranteed_possible_impossible_vimt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/vi/justified_in_saying_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/justified_in_saying_vimt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/vi/justified_in_saying_vimt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/vi/justified_in_saying_vimt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/zh/GPT-3_style_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/GPT-3_style_zhmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/zh/GPT-3_style_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/GPT-3_style_zhmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/zh/MNLI_crowdsource_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/MNLI_crowdsource_zhmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/zh/MNLI_crowdsource_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/MNLI_crowdsource_zhmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/zh/can_we_infer_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/can_we_infer_zhmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/zh/can_we_infer_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/can_we_infer_zhmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/zh/guaranteed_possible_impossible_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/guaranteed_possible_impossible_zhmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/zh/guaranteed_possible_impossible_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/guaranteed_possible_impossible_zhmt/results.json diff --git a/evaluation_xnlihtmt/xnlimt/zh/justified_in_saying_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/justified_in_saying_zhmt/results.json similarity index 100% rename from evaluation_xnlihtmt/xnlimt/zh/justified_in_saying_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xnlimt/xnli/zh/justified_in_saying_zhmt/results.json diff --git a/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhht/results.json diff --git a/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhht/results.json diff --git a/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Generate_Ending_zhht/results.json diff --git a/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhht/results.json diff --git a/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhht/results.json diff --git a/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/Replace_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/Replace_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/Replace_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/Replace_zhht/results.json diff --git a/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/True_or_False_zhht/results.json diff --git a/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhht/results.json diff --git a/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/stand_for_zhht/results.json diff --git a/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/Muennighoff_xwinograd/zh/underscore_refer_to_zhht/results.json diff --git a/evaluation_xwinostorycopaht/merged.csv b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/merged.csv similarity index 100% rename from evaluation_xwinostorycopaht/merged.csv rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/merged.csv diff --git a/evaluation_xwinostorycopaht/merged.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/merged.json similarity index 100% rename from evaluation_xwinostorycopaht/merged.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/merged.json diff --git a/evaluation_xwinostorycopaht/xcopa/zh/C1_or_C2?_premise_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/C1_or_C2?_premise_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/xcopa/zh/C1_or_C2?_premise_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/C1_or_C2?_premise_zhht/results.json diff --git a/evaluation_xwinostorycopaht/xcopa/zh/best_option_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/best_option_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/xcopa/zh/best_option_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/best_option_zhht/results.json diff --git a/evaluation_xwinostorycopaht/xcopa/zh/cause_effect_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/cause_effect_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/xcopa/zh/cause_effect_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/cause_effect_zhht/results.json diff --git a/evaluation_xwinostorycopaht/xcopa/zh/i_am_hesitating_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/i_am_hesitating_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/xcopa/zh/i_am_hesitating_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/i_am_hesitating_zhht/results.json diff --git a/evaluation_xwinostorycopaht/xcopa/zh/plausible_alternatives_zhht/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/plausible_alternatives_zhht/results.json similarity index 100% rename from evaluation_xwinostorycopaht/xcopa/zh/plausible_alternatives_zhht/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopaht/xcopa/zh/plausible_alternatives_zhht/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/merged.csv b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/merged.csv similarity index 100% rename from evaluation_xwinostorycopamt/merged.csv rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/merged.csv diff --git a/evaluation_xwinostorycopamt/merged.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/merged.json similarity index 100% rename from evaluation_xwinostorycopamt/merged.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/merged.json diff --git a/evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json b/evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json similarity index 100% rename from evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json rename to evaluation_bloomz-7b1/evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json diff --git a/logs/logs/main_log.txt b/logs/logs/main_log.txt new file mode 100644 index 0000000000000000000000000000000000000000..70148d593b0163e069eaf89d0cdd1d5e7b8bc697 --- /dev/null +++ b/logs/logs/main_log.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:524a637d585f893f1ea43a0f461af047926be408fe9971e14e9d1397d47e32fd +size 233998372